From 429befa1f2cf1665c88a9bc6d7f344231416d93c Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Thu, 3 Dec 2020 12:44:15 -0800 Subject: [PATCH 1/9] Checkpoint --- larky/src/test/resources/test_unittest.star | 34 +-------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/larky/src/test/resources/test_unittest.star b/larky/src/test/resources/test_unittest.star index befb3bb49..bd98879fe 100644 --- a/larky/src/test/resources/test_unittest.star +++ b/larky/src/test/resources/test_unittest.star @@ -22,36 +22,4 @@ def suite(): runner = unittest.TextTestRunner() -runner.run(suite()) - - - - -""" -hijack stdout, stdin, result -{ -'stdout': [ - . ... - ], - 'stderr': [ - ], - 'result': [ - ] -} -def wrapper(result): - print(result) - -def run(): - wrapper(drew()) - -# start-customer-code -load('blah', 'blah') -def drew(ctx): - a = 1 - b = 2 - result = blah.add(a, b) - -# end-customer-code - -run() -""" +runner.run(suite()) \ No newline at end of file From b4598f75f530bfad5b637b85385944ca416fee7b Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Wed, 24 Feb 2021 17:46:56 -0800 Subject: [PATCH 2/9] Fixes #28 --- .../security/larky/nativelib/std/Json.java | 60 ++++++++++++++++++- .../test/resources/test_loading_module.star | 11 +++- pom.xml | 22 +++++++ 3 files changed, 89 insertions(+), 4 deletions(-) diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java b/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java index f9cb2deb0..cc360c06d 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java @@ -14,8 +14,6 @@ package com.verygood.security.larky.nativelib.std; -import java.util.Arrays; -import java.util.Map; import net.starlark.java.annot.Param; import net.starlark.java.annot.StarlarkBuiltin; import net.starlark.java.annot.StarlarkMethod; @@ -31,6 +29,9 @@ import net.starlark.java.eval.StarlarkValue; import net.starlark.java.eval.Structure; +import java.util.Arrays; +import java.util.Map; + // Tests at //src/test/java/net/starlark/java/eval:testdata/json.sky /** @@ -43,6 +44,44 @@ doc = "Module json is a Starlark module of JSON-related functions.") public final class Json implements StarlarkValue { + //@formatter:off + private static final String _METHOD_ENCODE_DOCUMENTATION = + "

The encode function accepts one required positional argument, which it converts to" + + " JSON by cases:\n" + + "

\n" + + "An application-defined type may define its own JSON encoding.\n" + + "Encoding any other value yields an error.\n"; + //@formatter:on + + //@formatter:off + private static final String _METHOD_DECODE_DOCUMENTATION = + "The decode function accepts one positional parameter, a JSON string.\n" + + "It returns the Starlark value that the string denotes.\n" + + "\n" + + "Decoding fails if x is not a valid JSON encoding.\n"; + //@formatter:on private Json() {} /** @@ -100,6 +139,14 @@ public String encode(Object x) throws EvalException { return enc.out.toString(); } + @StarlarkMethod( + name = "dumps", + doc =_METHOD_ENCODE_DOCUMENTATION, + parameters = {@Param(name = "x")}) + public String dumps(Object x) throws EvalException { + return encode(x); + } + private static final class Encoder { private final StringBuilder out = new StringBuilder(); @@ -297,6 +344,15 @@ public Object decode(String x, StarlarkThread thread) throws EvalException { return new Decoder(thread.mutability(), x).decode(); } + @StarlarkMethod( + name = "loads", + doc =_METHOD_DECODE_DOCUMENTATION, + parameters = {@Param(name = "x")}, + useStarlarkThread = true) + public Object loads(String x, StarlarkThread thread) throws EvalException { + return decode(x, thread); + } + private static final class Decoder { // The decoder necessarily makes certain representation choices diff --git a/larky/src/test/resources/test_loading_module.star b/larky/src/test/resources/test_loading_module.star index eccfc18b0..079946927 100644 --- a/larky/src/test/resources/test_loading_module.star +++ b/larky/src/test/resources/test_loading_module.star @@ -18,5 +18,12 @@ load("testlib/builtinz", "setz", "collections") print(collections) print(setz.make()) print(hashlib.md5("foo")) -print(json.decode('{"one": 1, "two": 2}')) -print(json.decode('"\\ud83d\\ude39\\ud83d\\udc8d"')) \ No newline at end of file + +c1 = json.dumps({"one": 1, "two": 2}) +d1 = json.dumps("😹💍") +print(c1) +print(d1) +c = json.decode(c1) +d = json.decode(d1) +print(json.loads('{"one": 1, "two": 2}') == c) +print(json.loads('"\\ud83d\\ude39\\ud83d\\udc8d"') == d) \ No newline at end of file diff --git a/pom.xml b/pom.xml index f9131db80..e72e7d742 100644 --- a/pom.xml +++ b/pom.xml @@ -99,6 +99,25 @@ ${org.junit.version} + + com.google.re2j + re2j + ${google.re2j.version} + + + + com.google.crypto.tink + tink + ${google.crypto.tink} + + + + org.conscrypt + conscrypt-openjdk + ${org.conscrypt.version} + ${os.detected.classifier} + + javax.xml.bind @@ -168,11 +187,14 @@ 2.5.0 1.7.4 0.18 + 1.5.0 0.5.1 29.0-jre 1.1 + 1.5 1.0.1 2.3.0 + 2.5.1 20.1.0 4.13.1 3.5.13 From 7bcbe7f0e50c83ef663d274a572ed8ead57468f6 Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Thu, 25 Feb 2021 23:36:37 -0800 Subject: [PATCH 3/9] Add struct constructor enhancement and allow EvalException to propogate upwards --- .../security/larky/ModuleSupplier.java | 10 +++--- .../larky/nativelib/PythonBuiltins.java | 36 ++++++++++++++++++- .../larky/nativelib/test/UnittestModule.java | 1 + .../security/larky/parser/LarkyEvaluator.java | 10 +++--- .../security/larky/parser/LarkyScript.java | 7 ++-- .../larky/parser/ResourceContentStarFile.java | 30 ++++++++++------ larky/src/main/resources/stdlib/larky.star | 27 +++++++++++++- 7 files changed, 97 insertions(+), 24 deletions(-) diff --git a/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java b/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java index bd888c257..6b28c46e8 100644 --- a/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java +++ b/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java @@ -24,14 +24,15 @@ import com.verygood.security.larky.nativelib.PythonBuiltins; import com.verygood.security.larky.nativelib.std.C99Math; import com.verygood.security.larky.nativelib.std.Hashlib; - -import net.starlark.java.annot.StarlarkBuiltin; -import net.starlark.java.eval.StarlarkValue; import com.verygood.security.larky.nativelib.std.Json; import com.verygood.security.larky.nativelib.std.Proto; +import com.verygood.security.larky.nativelib.std.RE2RegexEngine; import com.verygood.security.larky.nativelib.test.LarkyAssertions; import com.verygood.security.larky.nativelib.test.UnittestModule; +import net.starlark.java.annot.StarlarkBuiltin; +import net.starlark.java.eval.StarlarkValue; + import java.util.Map; import java.util.function.Function; @@ -49,7 +50,8 @@ public class ModuleSupplier { Json.INSTANCE, Proto.INSTANCE, Hashlib.INSTANCE, - C99Math.INSTANCE + C99Math.INSTANCE, + RE2RegexEngine.INSTANCE ); public static final ImmutableSet TEST_MODULES = ImmutableSet.of( diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java b/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java index 4cf71c8dc..380878e46 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java @@ -17,7 +17,7 @@ * A work-in-progress to add methods as we need them. * * More here: https://docs.python.org/3/library/functions.html - * + * * */ @Library public final class PythonBuiltins { @@ -63,4 +63,38 @@ public StarlarkInt pow(StarlarkInt base, StarlarkInt exp, Object mod) throws Eva .modPow(exp.toBigInteger(), ((StarlarkInt) mod).toBigInteger()) ); } +// +// @StarlarkMethod( +// name = "bytes", +// doc = "immutable array of bytes", +// parameters = { +// @Param( +// name = "sequence", +// allowedTypes = { +// @ParamType(type = String.class), +// } +// ) +// } +// ) +// public StarlarkList bytes(String sequence) { +// byte[] bytes = sequence.getBytes(StandardCharsets.UTF_8); +// +// return StarlarkList.immutableOf(Stream.of(bytes.).map((byte[] x) -> Byte.toUnsignedInt(x)).collect(); +// } +// +// @StarlarkMethod( +// name = "chr", +// doc = "Return ascii ord", +// parameters = { +// @Param( +// name = "ordinal", +// allowedTypes = { +// @ParamType(type = StarlarkInt.class), +// } +// ) +// } +// ) +// public String chr(StarlarkInt ordinal) { +// return String.valueOf((char) ordinal.toIntUnchecked()); +// } } diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java b/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java index 2ece5bfa3..b0bbbfe72 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java @@ -63,6 +63,7 @@ public Object addTestToSuite(Object functionTestCase) { useStarlarkThread = true) public Object addFunctionUnderTest(Object function, StarlarkThread thread) { LarkyFunctionTestCase tc = new LarkyFunctionTestCase(Starlark.repr(function)); + //TODO: if this fails, we need to return a better error message. tc.setFunction((StarlarkFunction) function); tc.setThread(thread); return tc; diff --git a/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java b/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java index 264c58cdf..29e45684d 100644 --- a/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java +++ b/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java @@ -77,7 +77,7 @@ private void starlarkPrint(StarlarkThread thread, String msg) { } public Module eval(StarFile content) - throws IOException, InterruptedException { + throws IOException, InterruptedException, EvalException { if (pending.contains(content.path())) { throw throwCycleError(content.path()); } @@ -112,7 +112,7 @@ public Module eval(StarFile content) } public Object evalWithOutput(StarFile content) - throws IOException, InterruptedException { + throws IOException, InterruptedException, EvalException { // Make the modules available as predeclared bindings. StarlarkSemantics semantics = StarlarkSemantics.DEFAULT; @@ -183,7 +183,7 @@ public Module load(String moduleToLoad) { } else { loadedModule = evaluator.eval(content.resolve(moduleToLoad + LarkyScript.STAR_EXTENSION)); } - } catch (IOException | InterruptedException e) { + } catch (IOException | InterruptedException | EvalException e) { throw new RuntimeException(e); } return loadedModule; @@ -279,7 +279,7 @@ private Map processLoads(StarFile content, Program prog) { } @NotNull - private Program compileStarlarkProgram(Module module, ParserInput input, FileOptions options) { + private Program compileStarlarkProgram(Module module, ParserInput input, FileOptions options) throws EvalException { Program prog; try { prog = Program.compileFile(StarlarkFile.parse(input, options), module); @@ -289,7 +289,7 @@ private Program compileStarlarkProgram(Module module, ParserInput input, FileOpt console.error(error.toString()); errs.add(error.toString()); } - throw new RuntimeException( + throw new EvalException( String.format( "Error compiling Starlark program: %1$s%n" + "%2$s", diff --git a/larky/src/main/java/com/verygood/security/larky/parser/LarkyScript.java b/larky/src/main/java/com/verygood/security/larky/parser/LarkyScript.java index 613d2af6b..b9b16ffd2 100644 --- a/larky/src/main/java/com/verygood/security/larky/parser/LarkyScript.java +++ b/larky/src/main/java/com/verygood/security/larky/parser/LarkyScript.java @@ -27,6 +27,7 @@ import com.verygood.security.larky.ModuleSupplier.ModuleSet; import com.verygood.security.larky.console.Console; +import net.starlark.java.eval.EvalException; import net.starlark.java.eval.Module; import net.starlark.java.syntax.FileOptions; @@ -115,7 +116,7 @@ public ModuleSet getModuleSet() { @VisibleForTesting public Module executeSkylark(StarFile content, ModuleSet moduleSet, Console console) - throws IOException, InterruptedException { + throws IOException, InterruptedException, EvalException { CapturingStarFile capturingConfigFile = new CapturingStarFile(content); StarFilesSupplier starFilesSupplier = new StarFilesSupplier(); @@ -125,7 +126,7 @@ public Module executeSkylark(StarFile content, ModuleSet moduleSet, Console cons } public Object executeSkylarkWithOutput(StarFile content, ModuleSet moduleSet, Console console) - throws IOException, InterruptedException { + throws IOException, InterruptedException, EvalException { CapturingStarFile capturingConfigFile = new CapturingStarFile(content); StarFilesSupplier starFilesSupplier = new StarFilesSupplier(); @@ -176,7 +177,7 @@ private ParsedStarFile loadStarFileInternal(StarFile content, ModuleSet moduleSe Module module; try { module = new LarkyEvaluator(this, moduleSet, console).eval(content); - } catch (InterruptedException e) { + } catch (InterruptedException | EvalException e) { // This should not happen since we shouldn't have anything interruptable during loading. throw new RuntimeException("Internal error", e); } diff --git a/larky/src/main/java/com/verygood/security/larky/parser/ResourceContentStarFile.java b/larky/src/main/java/com/verygood/security/larky/parser/ResourceContentStarFile.java index 63a106b83..7dd0a1a40 100644 --- a/larky/src/main/java/com/verygood/security/larky/parser/ResourceContentStarFile.java +++ b/larky/src/main/java/com/verygood/security/larky/parser/ResourceContentStarFile.java @@ -1,14 +1,15 @@ package com.verygood.security.larky.parser; +import static com.verygood.security.larky.parser.LarkyEvaluator.LarkyLoader.STDLIB; + +import net.starlark.java.eval.EvalException; + import org.apache.commons.io.IOUtils; +import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; -import lombok.SneakyThrows; - -import static com.verygood.security.larky.parser.LarkyEvaluator.LarkyLoader.STDLIB; - public class ResourceContentStarFile implements StarFile { private String resourcePath; @@ -19,22 +20,31 @@ private ResourceContentStarFile(String resourcePath, byte[] content) { this.content = content; } - @SneakyThrows - public static ResourceContentStarFile buildStarFile(String resourcePath, InputStream inputStream) { + public static ResourceContentStarFile buildStarFile(String resourcePath, InputStream inputStream) throws IOException { return new ResourceContentStarFile(resourcePath, String.join("\n", IOUtils.readLines(inputStream, Charset.defaultCharset())).getBytes()); } - @SneakyThrows - public static ResourceContentStarFile buildStarFile(String resourcePath) { + public static ResourceContentStarFile buildStarFile(String resourcePath) throws EvalException { String resourceName = resolveResourceName(resourcePath); InputStream resourceStream = ResourceContentStarFile.class.getClassLoader().getResourceAsStream(resourceName); - return buildStarFile(resourceName, resourceStream); + if(resourceStream == null) { + throw new EvalException("Unable to find resource: " + resourceName); + } + try { + return buildStarFile(resourceName, resourceStream); + } catch (IOException e) { + throw new EvalException(e); + } } @Override public StarFile resolve(String path) { - return buildStarFile(path); + try { + return buildStarFile(path); + } catch (EvalException e) { + throw new RuntimeException(e); + } } @Override diff --git a/larky/src/main/resources/stdlib/larky.star b/larky/src/main/resources/stdlib/larky.star index d5761697e..695bd75f7 100644 --- a/larky/src/main/resources/stdlib/larky.star +++ b/larky/src/main/resources/stdlib/larky.star @@ -1,8 +1,33 @@ # For compatibility help with Python, introduced globals are going to be using # this as a namespace + +def _to_dict(s): + """Converts a `struct` to a `dict`. + Args: + s: A `struct`. + Returns: + A `dict` whose keys and values are the same as the fields in `s`. The + transformation is only applied to the struct's fields and not to any + nested values. + """ + attributes = dir(s) + if "to_json" in attributes: + attributes.remove("to_json") + if "to_proto" in attributes: + attributes.remove("to_proto") + return {key: getattr(s, key) for key in attributes} + + +def _struct__init__(**kwargs): + if "to_dict" in kwargs: + kwargs.remove("to_dict") + + return _struct(to_dict=_to_dict, **kwargs) + + larky = _struct( - struct=_struct, + struct=_struct__init__, mutablestruct=_mutablestruct, partial=_partial, property=_property, From 0770bc460beb2b46d613ec881012ff6150528017 Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Thu, 25 Feb 2021 23:37:48 -0800 Subject: [PATCH 4/9] Fixes #51 --- .../larky/nativelib/std/RE2RegexEngine.java | 463 ++++++++++++++++++ larky/src/main/resources/stdlib/re.star | 184 +++++++ .../verygood/security/larky/LarkyTest.java | 6 +- .../test/resources/stdlib_tests/test_re.star | 123 +++++ 4 files changed, 773 insertions(+), 3 deletions(-) create mode 100644 larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java create mode 100644 larky/src/main/resources/stdlib/re.star create mode 100644 larky/src/test/resources/stdlib_tests/test_re.star diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java b/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java new file mode 100644 index 000000000..99b86df22 --- /dev/null +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java @@ -0,0 +1,463 @@ +package com.verygood.security.larky.nativelib.std; + +import com.google.re2j.Matcher; +import com.google.re2j.Pattern; + +import net.starlark.java.annot.Param; +import net.starlark.java.annot.ParamType; +import net.starlark.java.annot.StarlarkBuiltin; +import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.NoneType; +import net.starlark.java.eval.Starlark; +import net.starlark.java.eval.StarlarkInt; +import net.starlark.java.eval.StarlarkList; +import net.starlark.java.eval.StarlarkValue; + +import java.util.Arrays; + + +@StarlarkBuiltin( + name = "re2j", + category = "BUILTIN", + doc = "This module provides access to the linear regular expression matching engine.\n" + + "\n" + + "This package provides an implementation of regular expression matching based on Russ Cox's linear-time RE2 algorithm.\n" + + "\n" + + "The API presented by com.google.re2j mimics that of java.util.regex.Matcher and java.util.regex.Pattern. While not identical, they are similar enough that most users can switch implementations simply by changing their imports.\n" + + "\n" + + "The syntax of the regular expressions accepted is the same general syntax used by Perl, Python, and other languages. More precisely, it is the syntax accepted by the C++ and Go implementations of RE2 described at https://github.com/google/re2/wiki/Syntax, except for \\C (match any byte), which is not supported because in this implementation, the matcher's input is conceptually a stream of Unicode code points, not bytes.\n" + + "\n" + + "The current API is rather small and intended for compatibility with java.util.regex, but the underlying implementation supports some additional features, such as the ability to process input character streams encoded as UTF-8 byte arrays. These may be exposed in a future release if there is sufficient interest." + + "\n" + + "More on syntax here: https://github.com/google/re2/wiki/Syntax") +public class RE2RegexEngine implements StarlarkValue { + + public static final RE2RegexEngine INSTANCE = new RE2RegexEngine(); + + private static final LarkyRegexPattern _Pattern = new LarkyRegexPattern(); + + @StarlarkMethod(name = "Pattern", doc = "pattern", structField = true) + public static LarkyRegexPattern Pattern() { return _Pattern; } + + // java <> larky objects + public static class LarkyRegexPattern implements StarlarkValue { + + @StarlarkMethod(name = "CASE_INSENSITIVE", doc = "Flag: case insensitive matching.", structField = true) + public StarlarkInt CASE_INSENSITIVE() { return StarlarkInt.of(Pattern.CASE_INSENSITIVE); } + + @StarlarkMethod(name = "DISABLE_UNICODE_GROUPS", doc = "Flag: Unicode groups (e.g. \\p\\ Greek\\ ) will be syntax errors", structField = true) + public StarlarkInt DISABLE_UNICODE_GROUPS() { return StarlarkInt.of(Pattern.DISABLE_UNICODE_GROUPS); } + + @StarlarkMethod(name = "DOTALL", doc = "Flag: dot (.) matches all characters, including newline.", structField = true) + public StarlarkInt DOTALL() { return StarlarkInt.of(Pattern.DOTALL); } + + @StarlarkMethod(name = "LONGEST_MATCH", doc = "Flag: matches longest possible string.", structField = true) + public StarlarkInt LONGEST_MATCH() { return StarlarkInt.of(Pattern.LONGEST_MATCH); } + + @StarlarkMethod(name = "MULTILINE", doc = "Flag: multiline matching: ^ and $ match at beginning and end of line, not just beginning and end of input.", structField = true) + public StarlarkInt MULTILINE() { return StarlarkInt.of(Pattern.MULTILINE); } + + private Pattern pattern; + + protected LarkyRegexPattern pattern(Pattern pattern) { + this.pattern = pattern; + return this; + } + + @StarlarkMethod( + name = "compile", + doc = "Creates and returns a new Pattern corresponding to compiling regex with the given flags." + + "If flags is not passed, it defaults to 0", + parameters = { + @Param(name = "regex"), + @Param( + name = "flags", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + }, + defaultValue = "0") + }) + public static LarkyRegexPattern compile(String regex, StarlarkInt flags) { + return new LarkyRegexPattern() + .pattern(Pattern.compile(regex, flags.toIntUnchecked())); + } + + @StarlarkMethod( + name = "matches", + doc = "Matches a string against a regular expression.", + parameters = { + @Param(name = "regex"), + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + }) + }) + public static boolean matches(String regex, String input) { + return Pattern.matches(regex, input); + } + + @StarlarkMethod( + name = "quote", + doc = "", + parameters = { + @Param( + name = "s", + allowedTypes = { + @ParamType(type = String.class), + }) + }) + public static String quote(String s) { + return Pattern.quote(s); + } + + @StarlarkMethod( + name = "flags", + doc = "" + ) + public StarlarkInt flags() { + return StarlarkInt.of(pattern.flags()); + } + + @StarlarkMethod(name="pattern", doc="") + public String pattern() { + return pattern.pattern(); + } + + @StarlarkMethod( + name = "matcher", + doc = "Creates a new Matcher matching the pattern against the input.\n", + parameters = { + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + }) + }) + public LarkyRegexMatcher matcher(String input) { + return new LarkyRegexMatcher(pattern.matcher(input), this); + } + + @StarlarkMethod( + name = "split", + doc = "", + parameters = { + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + }), + @Param( + name = "limit", + allowedTypes = { + @ParamType(type = StarlarkInt.class) + }, + defaultValue = "0" + ) + }) + public StarlarkList split(String input, StarlarkInt limit) { + return StarlarkList.immutableCopyOf( + Arrays.asList(pattern.split(input, limit.toIntUnchecked())) + ); + } + + @StarlarkMethod( + name = "group_count", + doc = "Returns the number of subgroups in this pattern.\n" + + "the number of subgroups; the overall match (group 0) does not count\n" + ) + public StarlarkInt groupCount() { + return StarlarkInt.of(pattern.groupCount()); + } + + } + + public static class LarkyRegexMatcher implements StarlarkValue { + private final Matcher matcher; + private final LarkyRegexPattern pattern; + + LarkyRegexMatcher(Matcher matcher) { + this.matcher = matcher; + this.pattern = new LarkyRegexPattern().pattern(matcher.pattern()); + } + + LarkyRegexMatcher(Matcher matcher, LarkyRegexPattern pattern) { + this.matcher = matcher; + this.pattern = pattern; + } + + @StarlarkMethod( + name = "pattern", + doc = "Returns the LarkyRegexPattern associated with this LarkyRegexMatcher.\n" + ) + public LarkyRegexPattern pattern() { + return pattern; + } + + @StarlarkMethod( + name = "reset", + doc = "Resets the LarkyRegexMatcher, rewinding input and discarding any match information.\n", + parameters = { + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + @ParamType(type = NoneType.class) + }, + defaultValue = "None" + ) + } + ) + public LarkyRegexMatcher reset(Object input) { + if(NoneType.class.isAssignableFrom(input.getClass())) { + matcher.reset(); + } + else if(String.class.isAssignableFrom(input.getClass())) { + matcher.reset(String.valueOf(input)); + } + return this; + } + + @StarlarkMethod( + name = "start", + doc = "Returns the start position of the most recent match." + + "\n" + + "Accepts a group index position, or defaults to 0 if it's the overall match.", + parameters = { + @Param( + name = "index", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + }, + defaultValue = "0" + ) + } + ) + public StarlarkInt start(StarlarkInt index) { + return StarlarkInt.of(matcher.start(index.toIntUnchecked())); + } + @StarlarkMethod( + name = "end", + doc = "Returns the end position of the most recent match." + + "\n" + + "Accepts a group index position, or defaults to 0 if it's the overall match.", + parameters = { + @Param( + name = "index", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + }, + defaultValue = "0" + ) + } + ) + public StarlarkInt end(StarlarkInt index) { + return StarlarkInt.of(matcher.end(index.toIntUnchecked())); + } + + @StarlarkMethod( + name = "group", + doc = "Returns the most recent match." + + "\n" + + "If no argument or None is passed in, returns the most recent match, or " + + "null if the group was not matched." + + "\n" + + "If a valid integer is returned, returns the subgroup of the most recent match." + + "\n" + + "Throws an exception if group < 0 or group > group_count()", + parameters = { + @Param( + name = "group", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + @ParamType(type = String.class), + @ParamType(type = NoneType.class), + }, + defaultValue = "None") + }) + public Object group(Object group) { + String g; + if(NoneType.class.isAssignableFrom(group.getClass())) { + g = matcher.group(); + } + else if(StarlarkInt.class.isAssignableFrom(group.getClass())) { + g = matcher.group(((StarlarkInt)group).toIntUnchecked()); + } + // default case + else { + g = matcher.group(String.valueOf(group)); + } + + if(g == null) + return Starlark.NONE; + return g; + + } + @StarlarkMethod( + name = "group_count", + doc = "Returns the number of subgroups in this pattern.\n" + + "the number of subgroups; the overall match (group 0) does not count\n" + ) + public StarlarkInt groupCount() { + return StarlarkInt.of(matcher.groupCount()); + } + + @StarlarkMethod( + name = "matches", + doc = "Matches the entire input against the pattern (anchored start and end). " + + "If there is a match, matches sets the match state to describe it.\n" + + "the number of subgroups; the overall match (group 0) does not count\n" + + "\n" + + "Returns: true if the entire input matches the pattern" + ) + public boolean matches() { + return matcher.matches(); + } + + @StarlarkMethod( + name = "looking_at", + doc = "Matches the beginning of input against the pattern (anchored start). " + + "If there is a match, looking_at sets the match state to describe it." + + "\n" + + "Returns true if the beginning of the input matches the pattern\n" + ) + public boolean lookingAt() { + return matcher.lookingAt(); + } + + @StarlarkMethod( + name = "find", + doc = "Matches the input against the pattern (unanchored), starting at a specified position." + + " If there is a match, find sets the match state to describe it." + + "\n" + + "start - the input position where the search begins\n" + + "\n" + + "Returns true if it finds a match or throw if start is not a valid input position\n", + parameters = { + @Param( + name = "start", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + }, + defaultValue = "0" + ) + } + ) + public boolean find(StarlarkInt start) { + return matcher.find(start.toIntUnchecked()); + } + + @StarlarkMethod( + name="quote_replacement", + doc = "Quotes '\\' and '$' in s, so that the returned string could be used in " + + "append_replacement(appendable_string, s) as a literal replacement of s.\n" + + "\n" + + "Returns: the quoted string", + parameters = { + @Param( + name = "s", + allowedTypes = { + @ParamType(type = String.class), + } + ) + } + ) + public static String quoteReplacement(String s) { + return Matcher.quoteReplacement(s); + } + + @StarlarkMethod( + name="append_replacement", + doc = "Appends to sb two strings: the text from the append position up to the " + + "beginning of the most recent match, and then the replacement with submatch groups" + + " substituted for references of the form $n, where n is the group number in decimal" + + ". It advances the append position to where the most recent match ended." + + "\n" + + "To embed a literal $, use \\$ (actually \"\\\\$\" with string escapes). The " + + "escape is only necessary when $ is followed by a digit, but it is always allowed. " + + "Only $ and \\ need escaping, but any character can be escaped." + + "\n" + + "\n" + + "The group number n in $n is always at least one digit and expands to use more " + + "digits as long as the resulting number is a valid group number for this pattern. " + + "To cut it off earlier, escape the first digit that should not be used." + + "\n" + + "Returns: the Matcher itself, for chained method calls\n", + parameters = { + @Param( + name = "sb", + allowedTypes = { + @ParamType(type = String.class), + } + ), + @Param( + name = "replacement", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public LarkyRegexMatcher appendReplacement(String sb, String replacement) { + return new LarkyRegexMatcher( + matcher + .appendReplacement( + new StringBuilder().append(sb), + replacement)); + } + + @StarlarkMethod( + name="append_tail", + doc = "Appends to sb the substring of the input from the append position to the " + + "end of the input." + + "\n" + + "Returns the argument sb, for method chaining\n", + parameters = { + @Param( + name = "s", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public String appendTail(String s) { + return matcher.appendTail(new StringBuilder().append(s)).toString(); + } + + @StarlarkMethod( + name="replace_all", + doc = "Returns the input with all matches replaced by replacement, interpreted as for" + + " append_replacement." + + "\n" + + "The input string with the matches replaced\n", + parameters = { + @Param( + name = "replacement", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public String replaceAll(String replacement) { + return matcher.replaceAll(replacement); + } + + @StarlarkMethod( + name="replace_first", + doc = "Returns the input with the first match replaced by replacement, " + + "interpreted as for append_replacement.\n" + + "\n" + + "The input string with the first matches replaced\n", + parameters = { + @Param( + name = "replacement", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public String replaceFirst(String replacement) { + return matcher.replaceFirst(replacement); + } + + } +} diff --git a/larky/src/main/resources/stdlib/re.star b/larky/src/main/resources/stdlib/re.star new file mode 100644 index 000000000..06c1721a6 --- /dev/null +++ b/larky/src/main/resources/stdlib/re.star @@ -0,0 +1,184 @@ +""" +""" +load("@stdlib/larky", "larky") +load("@stdlib/re2j", _re2j = "re2j") + + +def _enumify_iterable(iterable, enum_dict): + """A hacky function to turn an iterable into a dict with whose keys are the + members of the iterable, and value is the index.""" + for i, t in enumerate(iterable): + enum_dict[t] = i + return enum_dict + + +__ = -1 # Alias for the invalid class +RegexFlags = _enumify_iterable(iterable = [ + "A", + "ASCII", + "DEBUG", + "I", + "IGNORECASE", + "L", + "LOCALE", + "M", + "MULTILINE", + "S", + "DOTALL", + "X", + "VERBOSE", + "U", + "UNICODE", + "T", + "TEMPLATE", +], enum_dict = {'__' : __}) + + +# emulate class object +def _matcher__init__(matchobj): + + def group(*args): + if len(args) <= 1: + return matchobj.group(*args) + else: + m = [] + for i in args: + m.append(matchobj.group(i)) + return tuple(m) + + def groups(): + m = [] + for i in range(matchobj.group_count()): + m.append(matchobj.group(i+1)) + return tuple(m) + + return larky.struct( + group=group, + groups=groups + ) + +# -------------------------------------------------------------------- +# public interface + +def _match(pattern, string, flags=0): + """Try to apply the pattern at the start of the string, returning + a Match object, or None if no match was found.""" + _matcher = _compile(pattern, flags).matcher(string) + if not _matcher.looking_at(): + return None + return _matcher__init__(_matcher) + +def _fullmatch(pattern, string, flags=0): + """Try to apply the pattern to all of the string, returning + a Match object, or None if no match was found.""" + _matcher = _compile(pattern, flags).matcher(string) + if not _matcher.matches(): + return None + return _matcher__init__(_matcher) + +def _search(pattern, string, flags=0): + """Scan through string looking for a match to the pattern, returning + a Match object, or None if no match was found.""" + _matcher = _compile(pattern, flags).matcher(string) + if not _matcher.find(): + return None + return _matcher__init__(_matcher) + +def _sub(pattern, repl, string, count=0, flags=0): + """Return the string obtained by replacing the leftmost + non-overlapping occurrences of the pattern in string by the + replacement repl. repl can be either a string or a callable; + if a string, backslash escapes in it are processed. If it is + a callable, it's passed the Match object and must return + a replacement string to be used.""" + return _compile(pattern, flags).sub(repl, string, count) + +def _subn(pattern, repl, string, count=0, flags=0): + """Return a 2-tuple containing (new_string, number). + new_string is the string obtained by replacing the leftmost + non-overlapping occurrences of the pattern in the source + string by the replacement repl. number is the number of + substitutions that were made. repl can be either a string or a + callable; if a string, backslash escapes in it are processed. + If it is a callable, it's passed the Match object and must + return a replacement string to be used.""" + return _compile(pattern, flags).subn(repl, string, count) + +def _split(pattern, string, maxsplit=0, flags=0): + """Split the source string by the occurrences of the pattern, + returning a list containing the resulting substrings. If + capturing parentheses are used in pattern, then the text of all + groups in the pattern are also returned as part of the resulting + list. If maxsplit is nonzero, at most maxsplit splits occur, + and the remainder of the string is returned as the final element + of the list.""" + return _compile(pattern, flags).split(string, maxsplit) + +def _findall(pattern, string, flags=0): + """Return a list of all non-overlapping matches in the string. + If one or more capturing groups are present in the pattern, return + a list of groups; this will be a list of tuples if the pattern + has more than one group. + Empty matches are included in the result.""" + return _compile(pattern, flags).findall(string) + +def _finditer(pattern, string, flags=0): + """Return an iterator over all non-overlapping matches in the + string. For each match, the iterator returns a Match object. + Empty matches are included in the result.""" + return _compile(pattern, flags).finditer(string) + +def _compile(pattern, flags=0): + "Compile a regular expression pattern, returning a Pattern object." + pattern = _re2j.Pattern.compile(pattern, flags) + return pattern + +def _purge(): + "Clear the regular expression caches" + pass + +def _template(pattern, flags=0): + "Compile a template pattern, returning a Pattern object" + #return _compile(pattern, flags|T) + pass + +# SPECIAL_CHARS +# closing ')', '}' and ']' +# '-' (a range in character set) +# '&', '~', (extended character set operations) +# '#' (comment) and WHITESPACE (ignored) in verbose mode +#_special_chars_map = {i: '\\' + chr(i) for i in bytes('()[]{}?*+-|^$\\.&~# \t\n\r')} + +def _escape(pattern): + """ + Escape special characters in a string. + """ + res = "" + for c in pattern.elems(): + if any(( + (('0' <= c) and (c <= '9')), + (('A' <= c) and (c <= 'Z')), + (('a' <= c) and (c <= 'z')), + c == '_', + )): + res += c + else: + res += "\\" + c + return res + #return pattern.translate(_special_chars_map) + + +re = larky.struct( + compile = _compile, + search = _search, + match = _match, + fullmatch = _fullmatch, + split = _split, + findall = _findall, + finditer = _finditer, + sub = _sub, + subn = _subn, + escape = _escape, + purge = _purge, + template = _template, +) \ No newline at end of file diff --git a/larky/src/test/java/com/verygood/security/larky/LarkyTest.java b/larky/src/test/java/com/verygood/security/larky/LarkyTest.java index 18c68e18b..38d678c16 100644 --- a/larky/src/test/java/com/verygood/security/larky/LarkyTest.java +++ b/larky/src/test/java/com/verygood/security/larky/LarkyTest.java @@ -1,5 +1,7 @@ package com.verygood.security.larky; +import static com.verygood.security.larky.ModuleSupplier.CORE_MODULES; + import com.google.common.collect.ImmutableSet; import com.verygood.security.larky.console.testing.TestingConsole; @@ -23,8 +25,6 @@ import java.nio.file.Paths; import java.util.stream.Stream; -import static com.verygood.security.larky.ModuleSupplier.CORE_MODULES; - public class LarkyTest { @Test @@ -74,7 +74,7 @@ public void testStdLib() throws IOException { //.filter(f -> f.getFileName().startsWith("test_") && f.endsWith(".star")) .filter(f -> { String fileName = f.getFileName().toString(); - return fileName.startsWith("test_") && fileName.endsWith(".star"); + return fileName.startsWith("test_re") && fileName.endsWith(".star"); }) .forEach(f -> { try { diff --git a/larky/src/test/resources/stdlib_tests/test_re.star b/larky/src/test/resources/stdlib_tests/test_re.star new file mode 100644 index 000000000..4d83cb83e --- /dev/null +++ b/larky/src/test/resources/stdlib_tests/test_re.star @@ -0,0 +1,123 @@ +"""Unit tests for re.star""" + +load("@stdlib/asserts", "asserts") +load("@stdlib/unittest", "unittest") +load("@stdlib/re", "re") + + +def _test_escape(): + asserts.assert_that(re.escape(r"1243*&[]_dsfAd")).is_equal_to(r"1243\*\&\[\]_dsfAd") + + +# search +def _test_search(): + m = re.search(r"a+", "caaab") + asserts.assert_that(m.group(0)).is_equal_to("aaa") + asserts.assert_that(m.group()).is_equal_to("aaa") + +# match +def _test_match(): + m = re.match(r"(?ms)foo.*", "foo\nbar") + asserts.assert_that(m.group(0)).is_equal_to("foo\nbar") + + asserts.assert_that(re.match(r"a+", "caaab")).is_none() + m = re.match(r"a+", "aaaab") + asserts.assert_that(m.group(0)).is_equal_to("aaaa") + +def _test_groups(): + m = re.match(r"(\d+)\.(\d+)", "24.1632") + asserts.assert_that(m.groups()).is_equal_to(('24', '1632')) + asserts.assert_that(m.group(2, 1)).is_equal_to(('1632', '24')) + + m = re.match("(b)|(:+)", ":a") + asserts.assert_that(m.groups()).is_equal_to((None, ":")) +# +# # sub +# +# assert re.sub("a", "z", "caaab") == "czzzb" +# assert re.sub("a+", "z", "caaab") == "czb" +# assert re.sub("a", "z", "caaab", 1) == "czaab" +# assert re.sub("a", "z", "caaab", 2) == "czzab" +# assert re.sub("a", "z", "caaab", 10) == "czzzb" +# assert re.sub(r"[ :/?&]", "_", "http://foo.ua/bar/?a=1&b=baz/") == "http___foo.ua_bar__a=1_b=baz_" +# assert re.sub("a", lambda m: m.group(0) * 2, "caaab") == "caaaaaab" +# +# # subn +# +# assert re.subn("b*", "x", "xyz") == ('xxxyxzx', 4) +# +# # zero-length matches +# assert re.sub('(?m)^(?!$)', '--', 'foo') == '--foo' +# assert re.sub('(?m)^(?!$)', '--', 'foo\n') == '--foo\n' +# assert re.sub('(?m)^(?!$)', '--', 'foo\na') == '--foo\n--a' +# assert re.sub('(?m)^(?!$)', '--', 'foo\n\na') == '--foo\n\n--a' +# assert re.sub('(?m)^(?!$)', '--', 'foo\n\na', 1) == '--foo\n\na' +# assert re.sub('(?m)^(?!$)', '--', 'foo\n \na', 2) == '--foo\n-- \na' +# +# # split +# +# assert re.split('x*', 'foo') == ['foo'] +# assert re.split("(?m)^$", "foo\n\nbar\n") == ["foo\n\nbar\n"] +# assert re.split('\W+', 'Words, words, words.') == ['Words', 'words', 'words', ''] +# assert re.split('(\W+)', 'Words, words, words.') == ['Words', ', ', 'words', ', ', 'words', '.', ''] +# assert re.split('\W+', 'Words, words, words.', 1) == ['Words', 'words, words.'] +# assert re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE) == ['0', '3', '9'] +# assert re.split('(\W+)', '...words, words...') == ['', '...', 'words', ', ', 'words', '...', ''] +# assert re.split("(b)|(:+)", ":abc") == ['', None, ':', 'a', 'b', None, 'c'] +# +# # findall +# +# text = "He was carefully disguised but captured quickly by police." +# assert re.findall(r"\w+ly", text) == ['carefully', 'quickly'] +# +# text = "He was carefully disguised but captured quickly by police." +# assert re.findall(r"(\w+)(ly)", text) == [('careful', 'ly'), ('quick', 'ly')] +# +# text = "He was carefully disguised but captured quickly by police." +# assert re.findall(r"(\w+)ly", text) == ['careful', 'quick'] +# +# r = re.compile(r"\w+ly") +# text = "carefully disguised but captured quickly by police." +# assert r.findall(text, 1) == ['arefully', 'quickly'] +# +# _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) +# text = "\tfoo\n\tbar" +# indents = _leading_whitespace_re.findall(text) +# assert indents == ['\t', '\t'] +# +# text = " \thello there\n \t how are you?" +# indents = _leading_whitespace_re.findall(text) +# assert indents == [' \t', ' \t '] +# +# assert re.findall(r"\b", "a") == ['', ''] +# +# # handling of empty matches +# indent_re = re.compile('^([ ]*)(?=\S)', re.MULTILINE) +# s = "line number one\nline number two" +# assert indent_re.findall(s) == ['', ''] +# +# # finditer +# # based on CPython's test_re.py +# iter = re.finditer(r":+", "a:b::c:::d") +# assert [item.group(0) for item in iter] == [":", "::", ":::"] +# +# pat = re.compile(r":+") +# iter = pat.finditer("a:b::c:::d", 3, 8) +# assert [item.group(0) for item in iter] == ["::", "::"] +# +# s = "line one\nline two\n 3" +# iter = re.finditer(r"^ *", s, re.MULTILINE) +# assert [m.group() for m in iter] == ["", "", " "] +# +# assert [m.group() for m in re.finditer(r".*", "asdf")] == ["asdf", ""] + +def _suite(): + _suite = unittest.TestSuite() + _suite.addTest(unittest.FunctionTestCase(_test_escape)) + _suite.addTest(unittest.FunctionTestCase(_test_search)) + _suite.addTest(unittest.FunctionTestCase(_test_match)) + _suite.addTest(unittest.FunctionTestCase(_test_groups)) + return _suite + +_runner = unittest.TextTestRunner() +_runner.run(_suite()) \ No newline at end of file From ad603b25094f1b02dda0c557c291b82de5510464 Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Fri, 26 Feb 2021 22:18:00 -0800 Subject: [PATCH 5/9] Updated larky to use re2.split() to match the python re module --- .../security/larky/nativelib/README.md | 15 ++ .../larky/nativelib/std/RE2RegexEngine.java | 138 ++++++++++-- larky/src/main/resources/stdlib/re.star | 183 +++++++++++---- larky/src/main/resources/stdlib/types.star | 32 +++ .../test/resources/stdlib_tests/test_re.star | 212 +++++++++++------- pom.xml | 5 + 6 files changed, 440 insertions(+), 145 deletions(-) diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/README.md b/larky/src/main/java/com/verygood/security/larky/nativelib/README.md index 6ef22c57c..3a16a420e 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/README.md +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/README.md @@ -10,3 +10,18 @@ In order to ensure that Larky is compatible with Python (besides the obvious `lo As a result, globals should not be accessed directly. Instead, access Larky native functions and methods via the [`Larky` stdlib namespace](https://github.com/verygoodsecurity/starlarky/blob/master/larky/src/main/resources/stdlib/larky.star). Again, Do not access these libraries directly, but access them through Larky StdLib via the [`larky` namespace](https://github.com/verygoodsecurity/starlarky/blob/master/larky/src/main/resources/stdlib/larky.star). +### How does one emulate a while loop? +```python + while pos <= finish: + # do stuff +``` + +emulate it by: + +```python + for _while_ in range(1000): # "while pos <= finish" is the same as: + if pos > finish: # for _while_ in range(xxx): + break # if pos > finish: break +``` + +Obviously, range can take a larger number to emulate infinity. diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java b/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java index 99b86df22..23f5fd550 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java @@ -1,18 +1,23 @@ package com.verygood.security.larky.nativelib.std; +import com.google.common.base.Joiner; import com.google.re2j.Matcher; import com.google.re2j.Pattern; +import com.verygood.security.larky.parser.StarlarkUtil; + import net.starlark.java.annot.Param; import net.starlark.java.annot.ParamType; import net.starlark.java.annot.StarlarkBuiltin; import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.EvalException; import net.starlark.java.eval.NoneType; import net.starlark.java.eval.Starlark; import net.starlark.java.eval.StarlarkInt; import net.starlark.java.eval.StarlarkList; import net.starlark.java.eval.StarlarkValue; +import java.util.ArrayList; import java.util.Arrays; @@ -78,8 +83,8 @@ protected LarkyRegexPattern pattern(Pattern pattern) { defaultValue = "0") }) public static LarkyRegexPattern compile(String regex, StarlarkInt flags) { - return new LarkyRegexPattern() - .pattern(Pattern.compile(regex, flags.toIntUnchecked())); + int flag = flags.toIntUnchecked(); + return new LarkyRegexPattern().pattern(Pattern.compile(regex, flag)); } @StarlarkMethod( @@ -155,10 +160,97 @@ public LarkyRegexMatcher matcher(String input) { defaultValue = "0" ) }) - public StarlarkList split(String input, StarlarkInt limit) { - return StarlarkList.immutableCopyOf( - Arrays.asList(pattern.split(input, limit.toIntUnchecked())) - ); + public StarlarkList split(String input, StarlarkInt limit) { + Object[] strings = _py_re_split_impl(input, limit.toIntUnchecked()); + //String[] strSplit = pattern.split(input, _limit); + return StarlarkList.immutableCopyOf(Arrays.asList(strings)); + } + + private String[] _jdk_split_impl(CharSequence input, int limit) { + ArrayList matchList = new ArrayList<>(); + Matcher m = pattern.matcher(input); + + int index = 0; + boolean matchLimited = limit > 0; + // Add segments before each match found + while (m.find()) { + if (!matchLimited || matchList.size() < limit - 1) { + if (index == 0 && index == m.start() && m.start() == m.end()) { + // no empty leading substring included for zero-width match + // at the beginning of the input char sequence. + continue; + } + String match = input.subSequence(index, m.start()).toString(); + matchList.add(match); + index = m.end(); + } else if (matchList.size() == limit - 1) { // last one + String match = input.subSequence(index, + input.length()).toString(); + matchList.add(match); + index = m.end(); + + } + } + // If no match was found, return this + if (index == 0) { + return new String[]{input.toString()}; + } + if (!matchLimited || matchList.size() < limit) { + // Add remaining segment + matchList.add(input.subSequence(index, input.length()).toString()); + } + // Construct result + int resultSize = matchList.size(); + if (limit == 0) { + while (resultSize > 0 && matchList.get(resultSize - 1).equals("")) { + resultSize--; + } + } + String[] result = new String[resultSize]; + return matchList.subList(0, resultSize).toArray(result); + } + + private Object[] _py_re_split_impl(CharSequence input, int limit) { + Matcher m = pattern.matcher(input); + ArrayList matchList = new ArrayList<>(); + boolean matchLimited = limit > 0; + boolean has_capture = m.groupCount() > 0; + int index = 0; + String match; + + while(m.find()) { + if (!matchLimited || matchList.size() <= limit - 1) { + match = input.subSequence(index, m.start()).toString(); + matchList.add(match); + index = m.end(); + } else if (matchList.size() == limit - 1) { // last one + match = input.subSequence(index, + input.length()).toString(); + matchList.add(match); + index = m.end(); + } + if(has_capture) { + // Check if there's capture groups and add them + for(int i = 0; i < m.groupCount(); ++i) { + match = m.group(i+1); + matchList.add(match == null ? Starlark.NONE : match); + } + } + } + + // If no match was found, return this + if (index == 0) { + return new String[] {input.toString()}; + } + // NOTE: If maxsplit is nonzero, at most maxsplit splits occur, + // and the remainder of the string is returned as the final + // element of the list. + if (!matchLimited || matchList.size() <= limit) { + // Add remaining segment + matchList.add(input.subSequence(index, input.length()).toString()); + } + + return matchList.toArray(new Object[0]); } @StarlarkMethod( @@ -277,7 +369,7 @@ public StarlarkInt end(StarlarkInt index) { }) public Object group(Object group) { String g; - if(NoneType.class.isAssignableFrom(group.getClass())) { + if(Starlark.isNullOrNone(group)) { g = matcher.group(); } else if(StarlarkInt.class.isAssignableFrom(group.getClass())) { @@ -288,8 +380,9 @@ else if(StarlarkInt.class.isAssignableFrom(group.getClass())) { g = matcher.group(String.valueOf(group)); } - if(g == null) + if(g == null) { return Starlark.NONE; + } return g; } @@ -338,13 +431,18 @@ public boolean lookingAt() { name = "start", allowedTypes = { @ParamType(type = StarlarkInt.class), + @ParamType(type = NoneType.class), }, - defaultValue = "0" + defaultValue = "None" ) } ) - public boolean find(StarlarkInt start) { - return matcher.find(start.toIntUnchecked()); + public boolean find(Object start) { + if(Starlark.isNullOrNone(start)) { + return matcher.find(); + } + StarlarkInt s = (StarlarkInt) StarlarkUtil.valueToStarlark(start); + return matcher.find(s.toIntUnchecked()); } @StarlarkMethod( @@ -387,7 +485,7 @@ public static String quoteReplacement(String s) { @Param( name = "sb", allowedTypes = { - @ParamType(type = String.class), + @ParamType(type = StarlarkList.class), } ), @Param( @@ -397,12 +495,16 @@ public static String quoteReplacement(String s) { } )} ) - public LarkyRegexMatcher appendReplacement(String sb, String replacement) { - return new LarkyRegexMatcher( - matcher - .appendReplacement( - new StringBuilder().append(sb), - replacement)); + public LarkyRegexMatcher appendReplacement(StarlarkList sb, String replacement) { + StringBuilder builder = new StringBuilder().append(Joiner.on("").join(sb)); + matcher.appendReplacement(builder, replacement); + try { + sb.clearElements(); + sb.addElements(Arrays.asList(builder.toString().split(""))); + } catch (EvalException e) { + throw new RuntimeException(e); + } + return this; } @StarlarkMethod( diff --git a/larky/src/main/resources/stdlib/re.star b/larky/src/main/resources/stdlib/re.star index 06c1721a6..4b4d1bff6 100644 --- a/larky/src/main/resources/stdlib/re.star +++ b/larky/src/main/resources/stdlib/re.star @@ -1,42 +1,51 @@ """ """ load("@stdlib/larky", "larky") -load("@stdlib/re2j", _re2j = "re2j") +load("@stdlib/types", "types") +load("@stdlib/re2j", _re2j="re2j") -def _enumify_iterable(iterable, enum_dict): +def _enumify_iterable(iterable, enum_dict, numerator=None): """A hacky function to turn an iterable into a dict with whose keys are the - members of the iterable, and value is the index.""" + members of the iterable, and value is the index. + + If the key is a tuple, it will iterate over the keys and assign the same + enumerated position. + + A numerator is a callable that takes the enumerated position and returns + the expected number in order. For example, numerator=lambda x: x << 2 will + map to 1, 2, 4, 8, 16 instead of 1, 2, 3, 4, 5 + + + """ for i, t in enumerate(iterable): - enum_dict[t] = i + _i = i + if numerator and types.is_callable(numerator): + _i = numerator(i) + if types.is_tuple(t): + for t_elem in t: + enum_dict[t_elem] = _i + else: + enum_dict[t] = _i return enum_dict -__ = -1 # Alias for the invalid class -RegexFlags = _enumify_iterable(iterable = [ - "A", - "ASCII", +__ = -1 # Alias for the invalid class +RegexFlags = _enumify_iterable(iterable=[ + ("I", "IGNORECASE"), + ("S", "DOTALL"), + ("M", "MULTILINE"), + ("U", "UNICODE"), + "LONGEST_MATCH", + ("A", "ASCII"), "DEBUG", - "I", - "IGNORECASE", - "L", - "LOCALE", - "M", - "MULTILINE", - "S", - "DOTALL", - "X", - "VERBOSE", - "U", - "UNICODE", - "T", - "TEMPLATE", -], enum_dict = {'__' : __}) - + ("L", "LOCALE"), + ("X", "VERBOSE"), + ("T", "TEMPLATE"), +], enum_dict={'__': __}, numerator=lambda x: 1 << x) # emulate class object def _matcher__init__(matchobj): - def group(*args): if len(args) <= 1: return matchobj.group(*args) @@ -49,7 +58,7 @@ def _matcher__init__(matchobj): def groups(): m = [] for i in range(matchobj.group_count()): - m.append(matchobj.group(i+1)) + m.append(matchobj.group(i + 1)) return tuple(m) return larky.struct( @@ -57,9 +66,11 @@ def _matcher__init__(matchobj): groups=groups ) + # -------------------------------------------------------------------- # public interface + def _match(pattern, string, flags=0): """Try to apply the pattern at the start of the string, returning a Match object, or None if no match was found.""" @@ -68,6 +79,7 @@ def _match(pattern, string, flags=0): return None return _matcher__init__(_matcher) + def _fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning a Match object, or None if no match was found.""" @@ -76,6 +88,7 @@ def _fullmatch(pattern, string, flags=0): return None return _matcher__init__(_matcher) + def _search(pattern, string, flags=0): """Scan through string looking for a match to the pattern, returning a Match object, or None if no match was found.""" @@ -84,6 +97,7 @@ def _search(pattern, string, flags=0): return None return _matcher__init__(_matcher) + def _sub(pattern, repl, string, count=0, flags=0): """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the @@ -91,7 +105,9 @@ def _sub(pattern, repl, string, count=0, flags=0): if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" - return _compile(pattern, flags).sub(repl, string, count) + new_string, _number = _subn(pattern, repl, string, count, flags) + return new_string + def _subn(pattern, repl, string, count=0, flags=0): """Return a 2-tuple containing (new_string, number). @@ -102,7 +118,71 @@ def _subn(pattern, repl, string, count=0, flags=0): callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" - return _compile(pattern, flags).subn(repl, string, count) + # print("replacing:", string, "matching:", pattern, "with:", repl) + return _native_subn(pattern, string, repl, count, flags) + + +_WHILE_LOOP_EMULATION_ITERATION = 50 + + +def _native_subn(pattern, string, repl, count=0, flags=0): + _matcher = _compile(pattern, flags).matcher(string) + res = [] + cnt_rpl = 0 + + for _i in range(_WHILE_LOOP_EMULATION_ITERATION): + if not _matcher.find(): + break + _repl = repl + if types.is_callable(repl): + _repl = repl(_matcher) + _matcher.append_replacement(res, _repl) + cnt_rpl += 1 + if count != 0: + count -= 1 + if count == 0: + break + return _matcher.append_tail("".join(res)), cnt_rpl + + +def _larky_subn(pattern, s, repl, count=0, flags=0): + res = [] + pos = 0 + cnt_rpl = 0 + finish = len(s) + m = _compile(pattern, flags).matcher(s) + + for _while_ in range(_WHILE_LOOP_EMULATION_ITERATION): + if pos > finish: + break + + if not m.find(): + res.append(s[pos:]) + break + beg, end = m.start(), m.end() + res.append(s[pos:beg]) + if types.is_callable(repl): + res.append(repl(m)) + elif "\\" in repl: + res.append(m.quote_replacement(repl)) + else: + res.append(repl) + cnt_rpl += 1 + + pos = end + if beg == end: + # Have progress on empty matches + res.append(s[pos:pos + 1]) + pos += 1 + + if count != 0: + count -= 1 + if count == 0: + res.append(s[pos:]) + break + + return ''.join(res), cnt_rpl + def _split(pattern, string, maxsplit=0, flags=0): """Split the source string by the occurrences of the pattern, @@ -114,6 +194,7 @@ def _split(pattern, string, maxsplit=0, flags=0): of the list.""" return _compile(pattern, flags).split(string, maxsplit) + def _findall(pattern, string, flags=0): """Return a list of all non-overlapping matches in the string. If one or more capturing groups are present in the pattern, return @@ -122,32 +203,37 @@ def _findall(pattern, string, flags=0): Empty matches are included in the result.""" return _compile(pattern, flags).findall(string) + def _finditer(pattern, string, flags=0): """Return an iterator over all non-overlapping matches in the string. For each match, the iterator returns a Match object. Empty matches are included in the result.""" return _compile(pattern, flags).finditer(string) + def _compile(pattern, flags=0): "Compile a regular expression pattern, returning a Pattern object." pattern = _re2j.Pattern.compile(pattern, flags) return pattern + def _purge(): "Clear the regular expression caches" pass + def _template(pattern, flags=0): "Compile a template pattern, returning a Pattern object" - #return _compile(pattern, flags|T) + # return _compile(pattern, flags|T) pass + # SPECIAL_CHARS # closing ')', '}' and ']' # '-' (a range in character set) # '&', '~', (extended character set operations) # '#' (comment) and WHITESPACE (ignored) in verbose mode -#_special_chars_map = {i: '\\' + chr(i) for i in bytes('()[]{}?*+-|^$\\.&~# \t\n\r')} +# _special_chars_map = {i: '\\' + chr(i) for i in bytes('()[]{}?*+-|^$\\.&~# \t\n\r')} def _escape(pattern): """ @@ -156,29 +242,30 @@ def _escape(pattern): res = "" for c in pattern.elems(): if any(( - (('0' <= c) and (c <= '9')), - (('A' <= c) and (c <= 'Z')), - (('a' <= c) and (c <= 'z')), - c == '_', + (('0' <= c) and (c <= '9')), + (('A' <= c) and (c <= 'Z')), + (('a' <= c) and (c <= 'z')), + c == '_', )): res += c else: res += "\\" + c return res - #return pattern.translate(_special_chars_map) + # return pattern.translate(_special_chars_map) re = larky.struct( - compile = _compile, - search = _search, - match = _match, - fullmatch = _fullmatch, - split = _split, - findall = _findall, - finditer = _finditer, - sub = _sub, - subn = _subn, - escape = _escape, - purge = _purge, - template = _template, -) \ No newline at end of file + compile=_compile, + search=_search, + match=_match, + fullmatch=_fullmatch, + split=_split, + findall=_findall, + finditer=_finditer, + sub=_sub, + subn=_subn, + escape=_escape, + purge=_purge, + template=_template, + **RegexFlags +) diff --git a/larky/src/main/resources/stdlib/types.star b/larky/src/main/resources/stdlib/types.star index 6522f1547..466b3f768 100644 --- a/larky/src/main/resources/stdlib/types.star +++ b/larky/src/main/resources/stdlib/types.star @@ -28,6 +28,10 @@ def _a_function(): _a_function_type = type(_a_function) + +_a_lambda_type = type(lambda x: 1) + + def _is_list(v): """Returns True if v is an instance of a list. @@ -105,6 +109,7 @@ def _is_dict(v): """ return type(v) == _a_dict_type + def _is_function(v): """Returns True if v is an instance of a function. @@ -116,6 +121,31 @@ def _is_function(v): """ return type(v) == _a_function_type + +def _is_lambda(v): + """Returns True if v is an instance of a lambda. + + Args: + v: The value whose type should be checked. + + Returns: + True if v is an instance of a lambda, False otherwise. + """ + return type(v) == _a_lambda_type + + +def _is_callable(v): + """Returns True if v is a callable: an instance of a function or a lambda + + Args: + v: The value whose type should be checked. + + Returns: + True if v is an instance of a callable, False otherwise. + """ + return _is_function(v) or _is_lambda(v) + + def _is_set(v): """Returns True if v is a set created by sets.make(). @@ -384,6 +414,8 @@ types = larky.struct( is_tuple = _is_tuple, is_dict = _is_dict, is_function = _is_function, + is_lambda = _is_lambda, + is_callable = _is_callable, is_set = _is_set, is_instance = _is_instance, MethodType = _MethodType, diff --git a/larky/src/test/resources/stdlib_tests/test_re.star b/larky/src/test/resources/stdlib_tests/test_re.star index 4d83cb83e..45f68bb05 100644 --- a/larky/src/test/resources/stdlib_tests/test_re.star +++ b/larky/src/test/resources/stdlib_tests/test_re.star @@ -15,6 +15,7 @@ def _test_search(): asserts.assert_that(m.group(0)).is_equal_to("aaa") asserts.assert_that(m.group()).is_equal_to("aaa") + # match def _test_match(): m = re.match(r"(?ms)foo.*", "foo\nbar") @@ -24,6 +25,7 @@ def _test_match(): m = re.match(r"a+", "aaaab") asserts.assert_that(m.group(0)).is_equal_to("aaaa") + def _test_groups(): m = re.match(r"(\d+)\.(\d+)", "24.1632") asserts.assert_that(m.groups()).is_equal_to(('24', '1632')) @@ -31,85 +33,130 @@ def _test_groups(): m = re.match("(b)|(:+)", ":a") asserts.assert_that(m.groups()).is_equal_to((None, ":")) -# -# # sub -# -# assert re.sub("a", "z", "caaab") == "czzzb" -# assert re.sub("a+", "z", "caaab") == "czb" -# assert re.sub("a", "z", "caaab", 1) == "czaab" -# assert re.sub("a", "z", "caaab", 2) == "czzab" -# assert re.sub("a", "z", "caaab", 10) == "czzzb" -# assert re.sub(r"[ :/?&]", "_", "http://foo.ua/bar/?a=1&b=baz/") == "http___foo.ua_bar__a=1_b=baz_" -# assert re.sub("a", lambda m: m.group(0) * 2, "caaab") == "caaaaaab" -# -# # subn -# -# assert re.subn("b*", "x", "xyz") == ('xxxyxzx', 4) -# -# # zero-length matches -# assert re.sub('(?m)^(?!$)', '--', 'foo') == '--foo' -# assert re.sub('(?m)^(?!$)', '--', 'foo\n') == '--foo\n' -# assert re.sub('(?m)^(?!$)', '--', 'foo\na') == '--foo\n--a' -# assert re.sub('(?m)^(?!$)', '--', 'foo\n\na') == '--foo\n\n--a' -# assert re.sub('(?m)^(?!$)', '--', 'foo\n\na', 1) == '--foo\n\na' -# assert re.sub('(?m)^(?!$)', '--', 'foo\n \na', 2) == '--foo\n-- \na' -# -# # split -# -# assert re.split('x*', 'foo') == ['foo'] -# assert re.split("(?m)^$", "foo\n\nbar\n") == ["foo\n\nbar\n"] -# assert re.split('\W+', 'Words, words, words.') == ['Words', 'words', 'words', ''] -# assert re.split('(\W+)', 'Words, words, words.') == ['Words', ', ', 'words', ', ', 'words', '.', ''] -# assert re.split('\W+', 'Words, words, words.', 1) == ['Words', 'words, words.'] -# assert re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE) == ['0', '3', '9'] -# assert re.split('(\W+)', '...words, words...') == ['', '...', 'words', ', ', 'words', '...', ''] -# assert re.split("(b)|(:+)", ":abc") == ['', None, ':', 'a', 'b', None, 'c'] -# -# # findall -# -# text = "He was carefully disguised but captured quickly by police." -# assert re.findall(r"\w+ly", text) == ['carefully', 'quickly'] -# -# text = "He was carefully disguised but captured quickly by police." -# assert re.findall(r"(\w+)(ly)", text) == [('careful', 'ly'), ('quick', 'ly')] -# -# text = "He was carefully disguised but captured quickly by police." -# assert re.findall(r"(\w+)ly", text) == ['careful', 'quick'] -# -# r = re.compile(r"\w+ly") -# text = "carefully disguised but captured quickly by police." -# assert r.findall(text, 1) == ['arefully', 'quickly'] -# -# _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) -# text = "\tfoo\n\tbar" -# indents = _leading_whitespace_re.findall(text) -# assert indents == ['\t', '\t'] -# -# text = " \thello there\n \t how are you?" -# indents = _leading_whitespace_re.findall(text) -# assert indents == [' \t', ' \t '] -# -# assert re.findall(r"\b", "a") == ['', ''] -# -# # handling of empty matches -# indent_re = re.compile('^([ ]*)(?=\S)', re.MULTILINE) -# s = "line number one\nline number two" -# assert indent_re.findall(s) == ['', ''] -# -# # finditer -# # based on CPython's test_re.py -# iter = re.finditer(r":+", "a:b::c:::d") -# assert [item.group(0) for item in iter] == [":", "::", ":::"] -# -# pat = re.compile(r":+") -# iter = pat.finditer("a:b::c:::d", 3, 8) -# assert [item.group(0) for item in iter] == ["::", "::"] -# -# s = "line one\nline two\n 3" -# iter = re.finditer(r"^ *", s, re.MULTILINE) -# assert [m.group() for m in iter] == ["", "", " "] -# -# assert [m.group() for m in re.finditer(r".*", "asdf")] == ["asdf", ""] + + +# sub +def _test_sub(): + asserts.assert_that(re.sub("a", "z", "caaab")).is_equal_to("czzzb") + asserts.assert_that(re.sub("a+", "z", "caaab")).is_equal_to("czb") + asserts.assert_that(re.sub("a", "z", "caaab", 1)).is_equal_to("czaab") + asserts.assert_that(re.sub("a", "z", "caaab", 2)).is_equal_to("czzab") + asserts.assert_that(re.sub("a", "z", "caaab", 10)).is_equal_to("czzzb") + asserts.assert_that(re.sub(r"[ :/?&]", "_", "http://foo.ua/bar/?a=1&b=baz/")).is_equal_to("http___foo.ua_bar__a=1_b=baz_") + asserts.assert_that(re.sub("a", lambda m: m.group(0) * 2, "caaab")).is_equal_to("caaaaaab") + + +# subn +def _test_subn(): + asserts.assert_that(re.subn("b*", "x", "xyz")).is_equal_to(('xxxyxzx', 4)) + + +# zero-length matches +def _test_zero_length_matches(): + # currently not supported! + # you could try (?:$|[^,]) as an alternative to (?!,). + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo')).is_equal_to('--foo') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n')).is_equal_to('--foo\n') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\na')).is_equal_to('--foo\n--a') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n\na')).is_equal_to('--foo\n\n--a') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n\na', 1)).is_equal_to('--foo\n\na') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n \na', 2)).is_equal_to('--foo\n-- \na') + + +# split +def _test_split(): + asserts.assert_that(re.split('x*', 'foo')).is_equal_to(['', 'f', 'o', 'o', '']) + asserts.assert_that(re.split("(?m)^$", "foo\n\nbar\n")).is_equal_to(['foo\n', '\nbar\n', '']) + asserts.assert_that(re.split(r'\W+', 'Words, words, words.')).is_equal_to(['Words', 'words', 'words', '']) + asserts.assert_that(re.split(r'(\W+)', 'Words, words, words.')).is_equal_to(['Words', ', ', 'words', ', ', 'words', '.', '']) + asserts.assert_that(re.split(r'\W+', 'Words, words, words.', 1)).is_equal_to(['Words', 'words, words.']) + asserts.assert_that(re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)).is_equal_to(['0', '3', '9']) + asserts.assert_that(re.split(r'(\W+)', '...words, words...')).is_equal_to(['', '...', 'words', ', ', 'words', '...', '']) + asserts.assert_that(re.split("(b)|(:+)", ":abc")).is_equal_to(['', None, ':', 'a', 'b', None, 'c']) + # for string in ":a:b::c", S(":a:b::c"): + # self.assertTypedEqual(re.split(":", string), + # ['', 'a', 'b', '', 'c']) + # self.assertTypedEqual(re.split(":+", string), + # ['', 'a', 'b', 'c']) + # self.assertTypedEqual(re.split("(:+)", string), + # ['', ':', 'a', ':', 'b', '::', 'c']) + # for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"), + # memoryview(b":a:b::c")): + # self.assertTypedEqual(re.split(b":", string), + # [b'', b'a', b'b', b'', b'c']) + # self.assertTypedEqual(re.split(b":+", string), + # [b'', b'a', b'b', b'c']) + # self.assertTypedEqual(re.split(b"(:+)", string), + # [b'', b':', b'a', b':', b'b', b'::', b'c']) + # for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432", + # "\U0001d49c\U0001d49e\U0001d4b5"): + # string = ":%s:%s::%s" % (a, b, c) + # self.assertEqual(re.split(":", string), ['', a, b, '', c]) + # self.assertEqual(re.split(":+", string), ['', a, b, c]) + # self.assertEqual(re.split("(:+)", string), + # ['', ':', a, ':', b, '::', c]) + # + # self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c']) + # self.assertEqual(re.split("(:)+", ":a:b::c"), + # ['', ':', 'a', ':', 'b', ':', 'c']) + # self.assertEqual(re.split("([b:]+)", ":a:b::c"), + # ['', ':', 'a', ':b::', 'c']) + # self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), + # ['', None, ':', 'a', None, ':', '', 'b', None, '', + # None, '::', 'c']) + # self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), + # ['', 'a', '', '', 'c']) + + + +# findall +def _test_findall(): + text = "He was carefully disguised but captured quickly by police." + asserts.assert_that(re.findall(r"\w+ly", text)).is_equal_to(['carefully', 'quickly']) + + text = "He was carefully disguised but captured quickly by police." + asserts.assert_that(re.findall(r"(\w+)(ly)", text)).is_equal_to([('careful', 'ly'), ('quick', 'ly')]) + + text = "He was carefully disguised but captured quickly by police." + asserts.assert_that(re.findall(r"(\w+)ly", text)).is_equal_to(['careful', 'quick']) + + r = re.compile(r"\w+ly") + text = "carefully disguised but captured quickly by police." + asserts.assert_that(r.findall(text, 1)).is_equal_to(['arefully', 'quickly']) + + _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) + text = "\tfoo\n\tbar" + indents = _leading_whitespace_re.findall(text) + asserts.assert_that(indents).is_equal_to(['\t', '\t']) + + text = " \thello there\n \t how are you?" + indents = _leading_whitespace_re.findall(text) + asserts.assert_that(indents).is_equal_to([' \t', ' \t ']) + + asserts.assert_that(re.findall(r"\b", "a")).is_equal_to(['', '']) + + # handling of empty matches + indent_re = re.compile(r'^([ ]*)(?=\S)', re.MULTILINE) + s = "line number one\nline number two" + asserts.assert_that(indent_re.findall(s)).is_equal_to(['', '']) + + +# finditer +def _test_finditer(): + # based on CPython's test_re.py + iter = re.finditer(r":+", "a:b::c:::d") + asserts.assert_that([item.group(0) for item in iter]).is_equal_to([":", "::", ":::"]) + + pat = re.compile(r":+") + iter = pat.finditer("a:b::c:::d", 3, 8) + asserts.assert_that([item.group(0) for item in iter]).is_equal_to(["::", "::"]) + + s = "line one\nline two\n 3" + iter = re.finditer(r"^ *", s, re.MULTILINE) + asserts.assert_that([m.group() for m in iter]).is_equal_to(["", "", " "]) + + asserts.assert_that([m.group() for m in re.finditer(r".*", "asdf")]).is_equal_to(["asdf", ""]) + def _suite(): _suite = unittest.TestSuite() @@ -117,6 +164,13 @@ def _suite(): _suite.addTest(unittest.FunctionTestCase(_test_search)) _suite.addTest(unittest.FunctionTestCase(_test_match)) _suite.addTest(unittest.FunctionTestCase(_test_groups)) + _suite.addTest(unittest.FunctionTestCase(_test_sub)) + _suite.addTest(unittest.FunctionTestCase(_test_subn)) + # currently not supported! + #_suite.addTest(unittest.FunctionTestCase(_test_zero_length_matches)) + _suite.addTest(unittest.FunctionTestCase(_test_split)) + _suite.addTest(unittest.FunctionTestCase(_test_findall)) + _suite.addTest(unittest.FunctionTestCase(_test_finditer)) return _suite _runner = unittest.TextTestRunner() diff --git a/pom.xml b/pom.xml index e72e7d742..cfc0f2711 100644 --- a/pom.xml +++ b/pom.xml @@ -169,6 +169,11 @@ maven-source-plugin ${maven.source.plugin.version} + + + + + From c669d1bbe96c456185bab83348b0967d2b1e8e66 Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Fri, 26 Feb 2021 23:47:46 -0800 Subject: [PATCH 6/9] wrap regular expression in namedtuple --- .../larky/nativelib/std/RE2RegexEngine.java | 18 ++- larky/src/main/resources/stdlib/re.star | 122 ++++++++++++++++-- 2 files changed, 130 insertions(+), 10 deletions(-) diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java b/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java index 23f5fd550..df786780f 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java @@ -162,7 +162,6 @@ public LarkyRegexMatcher matcher(String input) { }) public StarlarkList split(String input, StarlarkInt limit) { Object[] strings = _py_re_split_impl(input, limit.toIntUnchecked()); - //String[] strSplit = pattern.split(input, _limit); return StarlarkList.immutableCopyOf(Arrays.asList(strings)); } @@ -262,6 +261,23 @@ public StarlarkInt groupCount() { return StarlarkInt.of(pattern.groupCount()); } +// @StarlarkMethod( +// name = "findall", +// doc = "Return a list of all non-overlapping matches in the string.\n" + +// "\n" + +// "If one or more capturing groups are present in the pattern, return\n" + +// "a list of groups; this will be a list of tuples if the pattern\n" + +// "has more than one group.\n" + +// "\n" + +// "Empty matches are included in the result.", +// parameters = { +// @Param(name = "input", allowedTypes = {@ParamType(type = String.class)}) +// } +// ) +// public StarlarkList findall(String input) { +// +// } + } public static class LarkyRegexMatcher implements StarlarkValue { diff --git a/larky/src/main/resources/stdlib/re.star b/larky/src/main/resources/stdlib/re.star index 4b4d1bff6..e860f9487 100644 --- a/larky/src/main/resources/stdlib/re.star +++ b/larky/src/main/resources/stdlib/re.star @@ -61,12 +61,67 @@ def _matcher__init__(matchobj): m.append(matchobj.group(i + 1)) return tuple(m) + + return larky.struct( group=group, - groups=groups + groups=groups, + find=matchobj.find, + pattern=matchobj.pattern, + start=matchobj.start, + end=matchobj.end, + group_count=matchobj.group_count, + matches=matchobj.matches, + looking_at=matchobj.looking_at, + replace_first=matchobj.replace_first, + replace_all=matchobj.replace_all, + append_tail=matchobj.append_tail, + append_replacement=matchobj.append_replacement, + quote_replacement=matchobj.quote_replacement ) +def _pattern__init__(patternobj): + + def search(string, flags=0): + return _search(patternobj.pattern(), string, flags) + + def match(string, flags=0): + return _match(patternobj.pattern(), string, flags) + + def matcher(string): + return _matcher__init__(patternobj.matcher(string)) + + def fullmatch(string, flags=0): + return _fullmatch(patternobj.pattern(), string, flags) + + def sub(repl, string, count=0, flags=0): + return _sub(patternobj.pattern(), repl, string, count, flags) + + def subn(repl, string, count=0, flags=0): + return _subn(patternobj.pattern(), repl, string, count, flags) + + def split(string, maxsplit=0, flags=0): + return _split(patternobj.pattern(), string, maxsplit, flags) + + def findall(string, flags=0): + return _findall(patternobj.pattern(), string, flags) + + def finditer(string, flags=0): + return _finditer(patternobj.pattern(), string, flags) + + return larky.struct( + search=search, + match=match, + fullmatch=fullmatch, + sub=sub, + subn=subn, + findall=findall, + finditer=finditer, + matcher=matcher, + split=split, + patternobj=patternobj + ) # -------------------------------------------------------------------- # public interface @@ -77,7 +132,7 @@ def _match(pattern, string, flags=0): _matcher = _compile(pattern, flags).matcher(string) if not _matcher.looking_at(): return None - return _matcher__init__(_matcher) + return _matcher def _fullmatch(pattern, string, flags=0): @@ -86,7 +141,7 @@ def _fullmatch(pattern, string, flags=0): _matcher = _compile(pattern, flags).matcher(string) if not _matcher.matches(): return None - return _matcher__init__(_matcher) + return _matcher def _search(pattern, string, flags=0): @@ -95,7 +150,7 @@ def _search(pattern, string, flags=0): _matcher = _compile(pattern, flags).matcher(string) if not _matcher.find(): return None - return _matcher__init__(_matcher) + return _matcher def _sub(pattern, repl, string, count=0, flags=0): @@ -192,29 +247,78 @@ def _split(pattern, string, maxsplit=0, flags=0): list. If maxsplit is nonzero, at most maxsplit splits occur, and the remainder of the string is returned as the final element of the list.""" - return _compile(pattern, flags).split(string, maxsplit) + return _compile(pattern, flags).patternobj.split(string, maxsplit) -def _findall(pattern, string, flags=0): +def _findall(pattern, s, flags=0): """Return a list of all non-overlapping matches in the string. If one or more capturing groups are present in the pattern, return a list of groups; this will be a list of tuples if the pattern has more than one group. Empty matches are included in the result.""" - return _compile(pattern, flags).findall(string) + res = [] + m = _compile(pattern, flags).matcher(s) + + pos = 0 + finish = len(s) + for _while_ in range(_WHILE_LOOP_EMULATION_ITERATION): + if pos > finish: + break + if not m.find(pos): + break + + print("---> ", m.group(), ":::", m.group_count()) + num = m.group_count() + if num == 0: + res.append(m.group()) + elif num == 1: + res.append(m.group(num)) + else: + res.append(tuple([m.group(_i+1) for _i in range(num)])) + + print(res) + beg, end = m.start(), m.end() + pos = end + if beg == end: + # Have progress on empty matches + pos += 1 + + for i in range(len(res)): + x = res[i] + if types.is_tuple(x): + res[i] = tuple(["%s" % x1 for x1 in x]) + else: + res[i] = "%s" % x + return res def _finditer(pattern, string, flags=0): """Return an iterator over all non-overlapping matches in the string. For each match, the iterator returns a Match object. Empty matches are included in the result.""" - return _compile(pattern, flags).finditer(string) + pass +# +# def finditer(self, s, pos=0, endpos=-1): +# if endpos != -1: +# s = s[:endpos] +# res = [] +# finish = len(s) +# while pos <= finish: +# m = self.search(s, pos) +# if not m: +# break +# yield m +# beg, end = m.span(0) +# pos = end +# if beg == end: +# # Have progress on empty matches +# pos += 1 def _compile(pattern, flags=0): "Compile a regular expression pattern, returning a Pattern object." pattern = _re2j.Pattern.compile(pattern, flags) - return pattern + return _pattern__init__(pattern) def _purge(): From 176914a571946f5a2c7a2b403c6b49e783b44efa Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Sat, 27 Feb 2021 16:53:34 -0800 Subject: [PATCH 7/9] regular expression tests fully working --- larky/src/main/resources/stdlib/re.star | 350 ++++++++++-------- .../verygood/security/larky/LarkyTest.java | 2 +- .../test/resources/stdlib_tests/test_re.star | 146 +++++--- 3 files changed, 286 insertions(+), 212 deletions(-) diff --git a/larky/src/main/resources/stdlib/re.star b/larky/src/main/resources/stdlib/re.star index e860f9487..bc3e7d099 100644 --- a/larky/src/main/resources/stdlib/re.star +++ b/larky/src/main/resources/stdlib/re.star @@ -1,10 +1,41 @@ """ +Emulates python's re module but using Google's re2. More on the syntax and + what is allowed and what is not here: + + https://github.com/google/re2/wiki/Syntax + +Java's standard regular expression package, java.util.regex, and many other +widely used regular expression packages such as PCRE, Perl and Python use a +backtracking implementation strategy: when a pattern presents two alternatives +such as a|b, the engine will try to match subpattern a first, and if that yields +no match, it will reset the input stream and try to match b instead. + +If such choices are deeply nested, this strategy requires an exponential number +of passes over the input data before it can detect whether the input matches. If +the input is large, it is easy to construct a pattern whose running time would +exceed the lifetime of the universe. This creates a security risk when accepting +regular expression patterns from untrusted sources, such as users of a web +application. + +In contrast, the RE2 algorithm explores all matches simultaneously in a single +pass over the input data by using a nondeterministic finite automaton. + +There are certain features of PCRE or Perl regular expressions that cannot be +implemented in linear time, for example, backreferences, but the vast majority +of regular expressions patterns in practice avoid such features. + +A good portion of `findall` and `finditer` code was ported from: +pfalcon's pycopy-lib located at: + https://github.com/pfalcon/pycopy-lib/tree/master/re-pcre """ load("@stdlib/larky", "larky") load("@stdlib/types", "types") load("@stdlib/re2j", _re2j="re2j") +_WHILE_LOOP_EMULATION_ITERATION = 50 + + def _enumify_iterable(iterable, enum_dict, numerator=None): """A hacky function to turn an iterable into a dict with whose keys are the members of the iterable, and value is the index. @@ -44,8 +75,10 @@ RegexFlags = _enumify_iterable(iterable=[ ("T", "TEMPLATE"), ], enum_dict={'__': __}, numerator=lambda x: 1 << x) + # emulate class object def _matcher__init__(matchobj): + def group(*args): if len(args) <= 1: return matchobj.group(*args) @@ -61,8 +94,6 @@ def _matcher__init__(matchobj): m.append(matchobj.group(i + 1)) return tuple(m) - - return larky.struct( group=group, groups=groups, @@ -83,32 +114,159 @@ def _matcher__init__(matchobj): def _pattern__init__(patternobj): - def search(string, flags=0): - return _search(patternobj.pattern(), string, flags) - - def match(string, flags=0): - return _match(patternobj.pattern(), string, flags) - def matcher(string): return _matcher__init__(patternobj.matcher(string)) - def fullmatch(string, flags=0): - return _fullmatch(patternobj.pattern(), string, flags) + def match(string, pos=0, endpos=-1): + m = matcher(string) + if not m.looking_at(): + return None + return m - def sub(repl, string, count=0, flags=0): - return _sub(patternobj.pattern(), repl, string, count, flags) + def fullmatch(string, pos=0, endpos=-1): + m = matcher(string) + if not m.matches(): + return None + return m + + def search(string, pos=0, endpos=-1): + m = matcher(string) + if not m.find(): + return None + return m - def subn(repl, string, count=0, flags=0): - return _subn(patternobj.pattern(), repl, string, count, flags) + def sub(repl, string, count=0): + new_string, _number = subn(repl, string, count) + return new_string + + def subn(repl, string, count=0): + return _native_subn(repl, string, count) + + def _native_subn(repl, string, count=0): + _matcher = matcher(string) + res = [] + cnt_rpl = 0 + + for _i in range(_WHILE_LOOP_EMULATION_ITERATION): + if not _matcher.find(): + break + _repl = repl + if types.is_callable(repl): + _repl = repl(_matcher) + _matcher.append_replacement(res, _repl) + cnt_rpl += 1 + if count != 0: + count -= 1 + if count == 0: + break + return _matcher.append_tail("".join(res)), cnt_rpl + + def _larky_subn(repl, s, count=0): + res = [] + pos = 0 + cnt_rpl = 0 + finish = len(s) + m = matcher(s) + + for _while_ in range(_WHILE_LOOP_EMULATION_ITERATION): + if pos > finish: + break + + if not m.find(): + res.append(s[pos:]) + break + beg, end = m.start(), m.end() + res.append(s[pos:beg]) + if types.is_callable(repl): + res.append(repl(m)) + elif "\\" in repl: + res.append(m.quote_replacement(repl)) + else: + res.append(repl) + cnt_rpl += 1 + + pos = end + if beg == end: + # Have progress on empty matches + res.append(s[pos:pos + 1]) + pos += 1 + + if count != 0: + count -= 1 + if count == 0: + res.append(s[pos:]) + break + + return ''.join(res), cnt_rpl + + # def split(string, maxsplit=0): + # return patternobj.split(string, maxsplit) + + def findall(s, pos=0, endpos=-1): + if endpos != -1: + s = s[:endpos] + + res = [] + finish = len(s) + m = matcher(s) + + for _while_ in range(_WHILE_LOOP_EMULATION_ITERATION): + if pos > finish: + break + if not m.find(pos): + break - def split(string, maxsplit=0, flags=0): - return _split(patternobj.pattern(), string, maxsplit, flags) + #print("---> ", m.group(), ":::", m.group_count()) + num = m.group_count() + if num == 0: + res.append(m.group()) + elif num == 1: + res.append(m.group(num)) + else: + res.append(tuple([m.group(_i+1) for _i in range(num)])) + + #print(res) + beg, end = m.start(), m.end() + pos = end + if beg == end: + # Have progress on empty matches + pos += 1 + + for i in range(len(res)): + x = res[i] + if types.is_tuple(x): + res[i] = tuple(["%s" % x1 for x1 in x]) + else: + res[i] = "%s" % x + return res + + def finditer(string, pos=0, endpos=-1): + # no generator/yield in starlark + if endpos != -1: + string = string[:endpos] + + res = [] + finish = len(string) + m = matcher(string) + + for _while_ in range(_WHILE_LOOP_EMULATION_ITERATION): + if pos > finish: + break + if not m.find(pos): + break + # copy matcher + set it to the position of the match + clone = matcher(string) + clone.find(pos) - def findall(string, flags=0): - return _findall(patternobj.pattern(), string, flags) + # return the matched object + res.append(clone) + beg, end = m.start(), m.end() + pos = end + if beg == end: + # Have progress on empty matches + pos += 1 + return res - def finditer(string, flags=0): - return _finditer(patternobj.pattern(), string, flags) return larky.struct( search=search, @@ -119,7 +277,7 @@ def _pattern__init__(patternobj): findall=findall, finditer=finditer, matcher=matcher, - split=split, + split=patternobj.split, patternobj=patternobj ) # -------------------------------------------------------------------- @@ -129,28 +287,22 @@ def _pattern__init__(patternobj): def _match(pattern, string, flags=0): """Try to apply the pattern at the start of the string, returning a Match object, or None if no match was found.""" - _matcher = _compile(pattern, flags).matcher(string) - if not _matcher.looking_at(): - return None - return _matcher + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.match(string) def _fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning a Match object, or None if no match was found.""" - _matcher = _compile(pattern, flags).matcher(string) - if not _matcher.matches(): - return None - return _matcher + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.fullmatch(string) def _search(pattern, string, flags=0): """Scan through string looking for a match to the pattern, returning a Match object, or None if no match was found.""" - _matcher = _compile(pattern, flags).matcher(string) - if not _matcher.find(): - return None - return _matcher + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.search(string) def _sub(pattern, repl, string, count=0, flags=0): @@ -160,8 +312,10 @@ def _sub(pattern, repl, string, count=0, flags=0): if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" - new_string, _number = _subn(pattern, repl, string, count, flags) - return new_string + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.sub(repl, string, count) + # new_string, _number = _subn(pattern, repl, string, count, flags) + # return new_string def _subn(pattern, repl, string, count=0, flags=0): @@ -174,69 +328,9 @@ def _subn(pattern, repl, string, count=0, flags=0): If it is a callable, it's passed the Match object and must return a replacement string to be used.""" # print("replacing:", string, "matching:", pattern, "with:", repl) - return _native_subn(pattern, string, repl, count, flags) - - -_WHILE_LOOP_EMULATION_ITERATION = 50 - - -def _native_subn(pattern, string, repl, count=0, flags=0): - _matcher = _compile(pattern, flags).matcher(string) - res = [] - cnt_rpl = 0 - - for _i in range(_WHILE_LOOP_EMULATION_ITERATION): - if not _matcher.find(): - break - _repl = repl - if types.is_callable(repl): - _repl = repl(_matcher) - _matcher.append_replacement(res, _repl) - cnt_rpl += 1 - if count != 0: - count -= 1 - if count == 0: - break - return _matcher.append_tail("".join(res)), cnt_rpl - - -def _larky_subn(pattern, s, repl, count=0, flags=0): - res = [] - pos = 0 - cnt_rpl = 0 - finish = len(s) - m = _compile(pattern, flags).matcher(s) - - for _while_ in range(_WHILE_LOOP_EMULATION_ITERATION): - if pos > finish: - break - - if not m.find(): - res.append(s[pos:]) - break - beg, end = m.start(), m.end() - res.append(s[pos:beg]) - if types.is_callable(repl): - res.append(repl(m)) - elif "\\" in repl: - res.append(m.quote_replacement(repl)) - else: - res.append(repl) - cnt_rpl += 1 - - pos = end - if beg == end: - # Have progress on empty matches - res.append(s[pos:pos + 1]) - pos += 1 - - if count != 0: - count -= 1 - if count == 0: - res.append(s[pos:]) - break - - return ''.join(res), cnt_rpl + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.subn(repl, string, count) + #return _native_subn(pattern, string, repl, count, flags) def _split(pattern, string, maxsplit=0, flags=0): @@ -247,72 +341,26 @@ def _split(pattern, string, maxsplit=0, flags=0): list. If maxsplit is nonzero, at most maxsplit splits occur, and the remainder of the string is returned as the final element of the list.""" - return _compile(pattern, flags).patternobj.split(string, maxsplit) + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.split(string, maxsplit) -def _findall(pattern, s, flags=0): +def _findall(pattern, string, flags=0): """Return a list of all non-overlapping matches in the string. If one or more capturing groups are present in the pattern, return a list of groups; this will be a list of tuples if the pattern has more than one group. Empty matches are included in the result.""" - res = [] - m = _compile(pattern, flags).matcher(s) - - pos = 0 - finish = len(s) - for _while_ in range(_WHILE_LOOP_EMULATION_ITERATION): - if pos > finish: - break - if not m.find(pos): - break - - print("---> ", m.group(), ":::", m.group_count()) - num = m.group_count() - if num == 0: - res.append(m.group()) - elif num == 1: - res.append(m.group(num)) - else: - res.append(tuple([m.group(_i+1) for _i in range(num)])) - - print(res) - beg, end = m.start(), m.end() - pos = end - if beg == end: - # Have progress on empty matches - pos += 1 - - for i in range(len(res)): - x = res[i] - if types.is_tuple(x): - res[i] = tuple(["%s" % x1 for x1 in x]) - else: - res[i] = "%s" % x - return res + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.findall(string) def _finditer(pattern, string, flags=0): """Return an iterator over all non-overlapping matches in the string. For each match, the iterator returns a Match object. Empty matches are included in the result.""" - pass -# -# def finditer(self, s, pos=0, endpos=-1): -# if endpos != -1: -# s = s[:endpos] -# res = [] -# finish = len(s) -# while pos <= finish: -# m = self.search(s, pos) -# if not m: -# break -# yield m -# beg, end = m.span(0) -# pos = end -# if beg == end: -# # Have progress on empty matches -# pos += 1 + _rx_pattern = _compile(pattern, flags) + return _rx_pattern.finditer(string) def _compile(pattern, flags=0): diff --git a/larky/src/test/java/com/verygood/security/larky/LarkyTest.java b/larky/src/test/java/com/verygood/security/larky/LarkyTest.java index 38d678c16..f867acf5d 100644 --- a/larky/src/test/java/com/verygood/security/larky/LarkyTest.java +++ b/larky/src/test/java/com/verygood/security/larky/LarkyTest.java @@ -74,7 +74,7 @@ public void testStdLib() throws IOException { //.filter(f -> f.getFileName().startsWith("test_") && f.endsWith(".star")) .filter(f -> { String fileName = f.getFileName().toString(); - return fileName.startsWith("test_re") && fileName.endsWith(".star"); + return fileName.startsWith("test_") && fileName.endsWith(".star"); }) .forEach(f -> { try { diff --git a/larky/src/test/resources/stdlib_tests/test_re.star b/larky/src/test/resources/stdlib_tests/test_re.star index 45f68bb05..3ab6fcc33 100644 --- a/larky/src/test/resources/stdlib_tests/test_re.star +++ b/larky/src/test/resources/stdlib_tests/test_re.star @@ -6,7 +6,8 @@ load("@stdlib/re", "re") def _test_escape(): - asserts.assert_that(re.escape(r"1243*&[]_dsfAd")).is_equal_to(r"1243\*\&\[\]_dsfAd") + asserts.assert_that(re.escape(r"1243*&[]_dsfAd")).is_equal_to( + r"1243\*\&\[\]_dsfAd") # search @@ -42,8 +43,11 @@ def _test_sub(): asserts.assert_that(re.sub("a", "z", "caaab", 1)).is_equal_to("czaab") asserts.assert_that(re.sub("a", "z", "caaab", 2)).is_equal_to("czzab") asserts.assert_that(re.sub("a", "z", "caaab", 10)).is_equal_to("czzzb") - asserts.assert_that(re.sub(r"[ :/?&]", "_", "http://foo.ua/bar/?a=1&b=baz/")).is_equal_to("http___foo.ua_bar__a=1_b=baz_") - asserts.assert_that(re.sub("a", lambda m: m.group(0) * 2, "caaab")).is_equal_to("caaaaaab") + asserts.assert_that( + re.sub(r"[ :/?&]", "_", "http://foo.ua/bar/?a=1&b=baz/")).is_equal_to( + "http___foo.ua_bar__a=1_b=baz_") + asserts.assert_that( + re.sub("a", lambda m: m.group(0) * 2, "caaab")).is_equal_to("caaaaaab") # subn @@ -55,70 +59,87 @@ def _test_subn(): def _test_zero_length_matches(): # currently not supported! # you could try (?:$|[^,]) as an alternative to (?!,). - asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo')).is_equal_to('--foo') - asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n')).is_equal_to('--foo\n') - asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\na')).is_equal_to('--foo\n--a') - asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n\na')).is_equal_to('--foo\n\n--a') - asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n\na', 1)).is_equal_to('--foo\n\na') - asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n \na', 2)).is_equal_to('--foo\n-- \na') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo')).is_equal_to( + '--foo') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\n')).is_equal_to( + '--foo\n') + asserts.assert_that(re.sub('(?m)^(?:$|[^$])', '--', 'foo\na')).is_equal_to( + '--foo\n--a') + asserts.assert_that( + re.sub('(?m)^(?:$|[^$])', '--', 'foo\n\na')).is_equal_to('--foo\n\n--a') + asserts.assert_that( + re.sub('(?m)^(?:$|[^$])', '--', 'foo\n\na', 1)).is_equal_to( + '--foo\n\na') + asserts.assert_that( + re.sub('(?m)^(?:$|[^$])', '--', 'foo\n \na', 2)).is_equal_to( + '--foo\n-- \na') # split def _test_split(): - asserts.assert_that(re.split('x*', 'foo')).is_equal_to(['', 'f', 'o', 'o', '']) - asserts.assert_that(re.split("(?m)^$", "foo\n\nbar\n")).is_equal_to(['foo\n', '\nbar\n', '']) - asserts.assert_that(re.split(r'\W+', 'Words, words, words.')).is_equal_to(['Words', 'words', 'words', '']) - asserts.assert_that(re.split(r'(\W+)', 'Words, words, words.')).is_equal_to(['Words', ', ', 'words', ', ', 'words', '.', '']) - asserts.assert_that(re.split(r'\W+', 'Words, words, words.', 1)).is_equal_to(['Words', 'words, words.']) - asserts.assert_that(re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)).is_equal_to(['0', '3', '9']) - asserts.assert_that(re.split(r'(\W+)', '...words, words...')).is_equal_to(['', '...', 'words', ', ', 'words', '...', '']) - asserts.assert_that(re.split("(b)|(:+)", ":abc")).is_equal_to(['', None, ':', 'a', 'b', None, 'c']) - # for string in ":a:b::c", S(":a:b::c"): - # self.assertTypedEqual(re.split(":", string), - # ['', 'a', 'b', '', 'c']) - # self.assertTypedEqual(re.split(":+", string), - # ['', 'a', 'b', 'c']) - # self.assertTypedEqual(re.split("(:+)", string), - # ['', ':', 'a', ':', 'b', '::', 'c']) - # for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"), - # memoryview(b":a:b::c")): - # self.assertTypedEqual(re.split(b":", string), - # [b'', b'a', b'b', b'', b'c']) - # self.assertTypedEqual(re.split(b":+", string), - # [b'', b'a', b'b', b'c']) - # self.assertTypedEqual(re.split(b"(:+)", string), - # [b'', b':', b'a', b':', b'b', b'::', b'c']) - # for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432", - # "\U0001d49c\U0001d49e\U0001d4b5"): - # string = ":%s:%s::%s" % (a, b, c) - # self.assertEqual(re.split(":", string), ['', a, b, '', c]) - # self.assertEqual(re.split(":+", string), ['', a, b, c]) - # self.assertEqual(re.split("(:+)", string), - # ['', ':', a, ':', b, '::', c]) - # - # self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c']) - # self.assertEqual(re.split("(:)+", ":a:b::c"), - # ['', ':', 'a', ':', 'b', ':', 'c']) - # self.assertEqual(re.split("([b:]+)", ":a:b::c"), - # ['', ':', 'a', ':b::', 'c']) - # self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), - # ['', None, ':', 'a', None, ':', '', 'b', None, '', - # None, '::', 'c']) - # self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), - # ['', 'a', '', '', 'c']) - + asserts.assert_that(re.split('x*', 'foo')).is_equal_to( + ['', 'f', 'o', 'o', '']) + asserts.assert_that(re.split("(?m)^$", "foo\n\nbar\n")).is_equal_to( + ['foo\n', '\nbar\n', '']) + asserts.assert_that(re.split(r'\W+', 'Words, words, words.')).is_equal_to( + ['Words', 'words', 'words', '']) + asserts.assert_that(re.split(r'(\W+)', 'Words, words, words.')).is_equal_to( + ['Words', ', ', 'words', ', ', 'words', '.', '']) + asserts.assert_that( + re.split(r'\W+', 'Words, words, words.', 1)).is_equal_to( + ['Words', 'words, words.']) + asserts.assert_that( + re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)).is_equal_to( + ['0', '3', '9']) + asserts.assert_that(re.split(r'(\W+)', '...words, words...')).is_equal_to( + ['', '...', 'words', ', ', 'words', '...', '']) + asserts.assert_that(re.split("(b)|(:+)", ":abc")).is_equal_to( + ['', None, ':', 'a', 'b', None, 'c']) + + # tests from cpython tests + string = ":a:b::c" + asserts.assert_that(re.split(":", string)).is_equal_to( + ['', 'a', 'b', '', 'c']) + asserts.assert_that(re.split(":+", string)).is_equal_to( + ['', 'a', 'b', 'c']) + asserts.assert_that(re.split("(:+)", string)).is_equal_to( + ['', ':', 'a', ':', 'b', '::', 'c']) + + # for a, b, c in ("\xe0\xdf\xe7", + # "\u0430\u0431\u0432", + # "\U0001d49c\U0001d49e\U0001d4b5"): + # string = ":%s:%s::%s" % (a, b, c) + # asserts.assert_that(re.split(":", string)).is_equal_to( + # ['', a, b, '', c]) + # asserts.assert_that(re.split(":+", string)).is_equal_to(['', a, b, c]) + # asserts.assert_that(re.split("(:+)", string)).is_equal_to( + # ['', ':', a, ':', b, '::', c]) + + asserts.assert_that(re.split("(?::+)", ":a:b::c")).is_equal_to( + ['', 'a', 'b', 'c']) + asserts.assert_that(re.split("(:)+", ":a:b::c")).is_equal_to( + ['', ':', 'a', ':', 'b', ':', 'c']) + asserts.assert_that(re.split("([b:]+)", ":a:b::c")).is_equal_to( + ['', ':', 'a', ':b::', 'c']) + asserts.assert_that(re.split("(b)|(:+)", ":a:b::c")).is_equal_to( + ['', None, ':', 'a', None, ':', '', 'b', None, '', None, '::', 'c']) + asserts.assert_that(re.split("(?:b)|(?::+)", ":a:b::c")).is_equal_to( + ['', 'a', '', '', 'c']) # findall def _test_findall(): text = "He was carefully disguised but captured quickly by police." - asserts.assert_that(re.findall(r"\w+ly", text)).is_equal_to(['carefully', 'quickly']) + asserts.assert_that(re.findall(r"\w+ly", text)).is_equal_to( + ['carefully', 'quickly']) text = "He was carefully disguised but captured quickly by police." - asserts.assert_that(re.findall(r"(\w+)(ly)", text)).is_equal_to([('careful', 'ly'), ('quick', 'ly')]) + asserts.assert_that(re.findall(r"(\w+)(ly)", text)).is_equal_to( + [('careful', 'ly'), ('quick', 'ly')]) text = "He was carefully disguised but captured quickly by police." - asserts.assert_that(re.findall(r"(\w+)ly", text)).is_equal_to(['careful', 'quick']) + asserts.assert_that(re.findall(r"(\w+)ly", text)).is_equal_to( + ['careful', 'quick']) r = re.compile(r"\w+ly") text = "carefully disguised but captured quickly by police." @@ -136,7 +157,7 @@ def _test_findall(): asserts.assert_that(re.findall(r"\b", "a")).is_equal_to(['', '']) # handling of empty matches - indent_re = re.compile(r'^([ ]*)(?=\S)', re.MULTILINE) + indent_re = re.compile(r'^([ ]*)(?:\S)', re.MULTILINE) s = "line number one\nline number two" asserts.assert_that(indent_re.findall(s)).is_equal_to(['', '']) @@ -145,17 +166,21 @@ def _test_findall(): def _test_finditer(): # based on CPython's test_re.py iter = re.finditer(r":+", "a:b::c:::d") - asserts.assert_that([item.group(0) for item in iter]).is_equal_to([":", "::", ":::"]) + asserts.assert_that([item.group(0) for item in iter]).is_equal_to( + [":", "::", ":::"]) pat = re.compile(r":+") iter = pat.finditer("a:b::c:::d", 3, 8) - asserts.assert_that([item.group(0) for item in iter]).is_equal_to(["::", "::"]) + asserts.assert_that([item.group(0) for item in iter]).is_equal_to( + ["::", "::"]) s = "line one\nline two\n 3" iter = re.finditer(r"^ *", s, re.MULTILINE) asserts.assert_that([m.group() for m in iter]).is_equal_to(["", "", " "]) - asserts.assert_that([m.group() for m in re.finditer(r".*", "asdf")]).is_equal_to(["asdf", ""]) + asserts.assert_that( + [m.group() for m in re.finditer(r".*", "asdf")]).is_equal_to( + ["asdf", ""]) def _suite(): @@ -167,11 +192,12 @@ def _suite(): _suite.addTest(unittest.FunctionTestCase(_test_sub)) _suite.addTest(unittest.FunctionTestCase(_test_subn)) # currently not supported! - #_suite.addTest(unittest.FunctionTestCase(_test_zero_length_matches)) + # _suite.addTest(unittest.FunctionTestCase(_test_zero_length_matches)) _suite.addTest(unittest.FunctionTestCase(_test_split)) _suite.addTest(unittest.FunctionTestCase(_test_findall)) _suite.addTest(unittest.FunctionTestCase(_test_finditer)) return _suite + _runner = unittest.TextTestRunner() -_runner.run(_suite()) \ No newline at end of file +_runner.run(_suite()) From e914ea9649810db3dda04d1b5e5729c0bc81502c Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Sun, 28 Feb 2021 19:12:44 -0800 Subject: [PATCH 8/9] Allow building java 1.8 with java 11 and allow building with java8 as well --- larky/pom.xml | 16 -- .../com/verygood/security/larky/Larky.java | 12 +- .../larky/nativelib/test/UnittestModule.java | 8 +- .../security/larky/parser/LarkyEvaluator.java | 3 +- .../larky/parser/PrependMergedStarFile.java | 10 +- pom.xml | 139 ++++++++++++++---- runlarky/pom.xml | 2 +- 7 files changed, 134 insertions(+), 56 deletions(-) diff --git a/larky/pom.xml b/larky/pom.xml index 64bb2aa40..37d4fa45e 100644 --- a/larky/pom.xml +++ b/larky/pom.xml @@ -10,22 +10,6 @@ 4.0.0 larky - - - - org.apache.maven.plugins - maven-compiler-plugin - - 11 - 11 - - - - - - - true - diff --git a/larky/src/main/java/com/verygood/security/larky/Larky.java b/larky/src/main/java/com/verygood/security/larky/Larky.java index e8f8522d8..01aa4cf45 100644 --- a/larky/src/main/java/com/verygood/security/larky/Larky.java +++ b/larky/src/main/java/com/verygood/security/larky/Larky.java @@ -1,6 +1,8 @@ package com.verygood.security.larky; +import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.common.annotations.VisibleForTesting; import net.starlark.java.eval.EvalException; @@ -17,16 +19,16 @@ import net.starlark.java.syntax.SyntaxError; import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.time.Duration; -import static java.nio.charset.StandardCharsets.UTF_8; - public class Larky { private static final String START_PROMPT = ">> "; @@ -143,7 +145,11 @@ static int execute(ParserInput input) { } static void writeOutput(String outputFile, StarlarkValue returnValue) throws IOException { - Files.writeString(Paths.get(outputFile), returnValue.toString(), StandardOpenOption.CREATE); + try (BufferedWriter bw = Files.newBufferedWriter(Paths.get(outputFile), + Charset.defaultCharset(), + StandardOpenOption.CREATE)) { + bw.write(returnValue.toString()); + } } public static void main(String[] args) throws Exception { diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java b/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java index b0bbbfe72..04ecd2909 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java +++ b/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java @@ -132,12 +132,14 @@ public void runSuiteTest(Object suiteTest) throws EvalException { TestResult result = doRun(suite); if(!result.wasSuccessful()) { Iterator it = Iterators.concat( - result.errors().asIterator(), - result.failures().asIterator()); + Iterators.forEnumeration(result.errors()), + Iterators.forEnumeration(result.failures()) + ); //noinspection LoopStatementThatDoesntLoop while (it.hasNext()) { TestFailure f = it.next(); - throw Starlark.errorf(f.trace()); + //final String testFailureWithTrace = f.trace(); + throw Starlark.errorf("%s", f.trace()); } } } diff --git a/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java b/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java index 29e45684d..3b902a3f5 100644 --- a/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java +++ b/larky/src/main/java/com/verygood/security/larky/parser/LarkyEvaluator.java @@ -31,6 +31,7 @@ import java.net.URISyntaxException; import java.net.URL; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashSet; @@ -254,7 +255,7 @@ private Path getStdlibPath() { return null; } - return Path.of(resourceAsURI); + return Paths.get(resourceAsURI); } @SuppressWarnings("UnstableApiUsage") diff --git a/larky/src/main/java/com/verygood/security/larky/parser/PrependMergedStarFile.java b/larky/src/main/java/com/verygood/security/larky/parser/PrependMergedStarFile.java index c2993a72b..85006ef4a 100644 --- a/larky/src/main/java/com/verygood/security/larky/parser/PrependMergedStarFile.java +++ b/larky/src/main/java/com/verygood/security/larky/parser/PrependMergedStarFile.java @@ -1,14 +1,16 @@ package com.verygood.security.larky.parser; +import com.google.common.collect.Lists; + import java.io.IOException; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.List; - import lombok.SneakyThrows; public class PrependMergedStarFile implements StarFile { - private List PRELOADER_PREFIXES = List.of(" ", "#", "load"); + private List PRELOADER_PREFIXES = Lists.newArrayList(" ", "#", "load"); private String content; public PrependMergedStarFile(String scriptFile) { @@ -39,8 +41,8 @@ public PrependMergedStarFile(String input, String script) { @Override public StarFile resolve(String path) { Path resolved = StarFile.isAbsolute(path) - ? Path.of(path) - : Path.of(getClass().getClassLoader().getResource(path).toURI()); + ? Paths.get(path) + : Paths.get(getClass().getClassLoader().getResource(path).toURI()); return new PathBasedStarFile(resolved, null, null); } diff --git a/pom.xml b/pom.xml index cfc0f2711..0b3819f97 100644 --- a/pom.xml +++ b/pom.xml @@ -100,22 +100,29 @@ - com.google.re2j - re2j - ${google.re2j.version} + com.google.re2j + re2j + ${google.re2j.version} + - com.google.crypto.tink - tink - ${google.crypto.tink} + com.google.crypto.tink + tink + ${google.crypto.tink} - org.conscrypt - conscrypt-openjdk - ${org.conscrypt.version} - ${os.detected.classifier} + org.conscrypt + conscrypt-openjdk + ${org.conscrypt.version} + ${os.detected.classifier} + + + + org.bouncycastle + bcprov-ext-jdk15to18 + ${org.bouncycastle.version} @@ -155,13 +162,51 @@ + org.apache.maven.plugins maven-compiler-plugin ${maven.compiler.plugin.version} + true ${maven.compiler.source} ${maven.compiler.target} + true + true + false + false + + -Xlint:unchecked + + + -XDcompilePolicy=simple + + + -Xplugin:ErrorProne -Xep:MissingOverride:OFF -Xep:MixedMutabilityReturnType:OFF -Xep:UnnecessaryAnonymousClass:OFF -Xep:PreferJavaTimeOverload:OFF + + + + com.google.errorprone + error_prone_core + ${google.errorprone.version} + + + org.projectlombok + lombok + ${org.projectlombok.version} + + + com.google.auto.value + auto-value + ${google.auto.value.version} + + @@ -169,49 +214,87 @@ maven-source-plugin ${maven.source.plugin.version} - - - - - + + kr.motd.maven + os-maven-plugin + ${os-maven-plugin.version} + + + initialize + + detect + + + + - - - kr.motd.maven - os-maven-plugin - ${kr.motd.maven.os-maven-plugin.version} - - + + + jdk8 + + 1.8 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + true + + + -J-Xbootclasspath/p:${settings.localRepository}/com/google/errorprone/javac/${google.errorprone.javac.version}/javac-${google.errorprone.javac.version}.jar + + + + + + maven-surefire-plugin + ${surefire-plugin.version} + + -Xbootclasspath/p:${settings.localRepository}/com/google/errorprone/javac/${google.errorprone.javac.version}/javac-${google.errorprone.javac.version}.jar + false + + + + + + + + + 8 + 8 3.8.1 2.22.1 3.6.2 3.2.1 - 2.5.0 - 1.7.4 + 2.5.1 + 9+181-r4173-1 + 1.7 0.18 1.5.0 0.5.1 - 29.0-jre + 30.1-jre 1.1 1.5 1.0.1 2.3.0 2.5.1 + 1.68 20.1.0 4.13.1 3.5.13 1.18.12 1.7.30 - 1.6.1 + 1.7.0 0.6.1 3.14.0 UTF-8 UTF-8 - 8 - 8 UTF-8 diff --git a/runlarky/pom.xml b/runlarky/pom.xml index 5604390da..4ee0a63bb 100644 --- a/runlarky/pom.xml +++ b/runlarky/pom.xml @@ -122,7 +122,7 @@ ${project.build.directory}/${project.build.finalName}-runner org.jboss.logmanager.LogManager - ${maven.home} + @{maven.home} From 2b52b972de8dd23bcf5c82a47c979e620464eeba Mon Sep 17 00:00:00 2001 From: Mahmoud Abdelkader Date: Mon, 1 Mar 2021 14:24:39 -0800 Subject: [PATCH 9/9] Restructure the larky project. Fixes #18. --- .../security/larky/ModuleSupplier.java | 30 +- .../C99MathModule.java} | 6 +- .../Hashlib.java => modules/HashModule.java} | 6 +- .../std/Json.java => modules/JsonModule.java} | 10 +- .../ProtoBufModule.java} | 8 +- .../larky/{nativelib => modules}/README.md | 7 + .../security/larky/modules/RegexModule.java | 33 + .../globals}/LarkyGlobals.java | 11 +- .../globals}/PythonBuiltins.java | 2 +- .../larky/modules/re/RegexMatcher.java | 317 ++++++++++ .../larky/modules/re/RegexPattern.java | 261 ++++++++ .../testing/AssertionsModule.java} | 6 +- .../security/larky/modules/testing/README.md | 5 + .../testing}/UnittestModule.java | 13 +- .../structs => modules/types}/Partial.java | 2 +- .../types/Property.java} | 19 +- .../types}/hashing/HashObject.java | 2 +- .../types}/structs/ImmutableStruct.java | 2 +- .../types}/structs/MutableStruct.java | 12 +- .../types}/structs/SimpleStruct.java | 56 +- .../structs/SimpleStructWithMethods.java | 58 ++ .../larky/nativelib/std/RE2RegexEngine.java | 581 ------------------ .../security/larky/utils/NullPrintStream.java | 12 + .../resources/stdlib/vendor/pycryptodome.star | 0 .../verygood/security/larky/LarkyTest.java | 6 +- .../verygood/security/larky/ScriptTest.java | 4 +- 26 files changed, 753 insertions(+), 716 deletions(-) rename larky/src/main/java/com/verygood/security/larky/{nativelib/std/C99Math.java => modules/C99MathModule.java} (95%) rename larky/src/main/java/com/verygood/security/larky/{nativelib/std/Hashlib.java => modules/HashModule.java} (89%) rename larky/src/main/java/com/verygood/security/larky/{nativelib/std/Json.java => modules/JsonModule.java} (98%) rename larky/src/main/java/com/verygood/security/larky/{nativelib/std/Proto.java => modules/ProtoBufModule.java} (96%) rename larky/src/main/java/com/verygood/security/larky/{nativelib => modules}/README.md (87%) create mode 100644 larky/src/main/java/com/verygood/security/larky/modules/RegexModule.java rename larky/src/main/java/com/verygood/security/larky/{nativelib => modules/globals}/LarkyGlobals.java (90%) rename larky/src/main/java/com/verygood/security/larky/{nativelib => modules/globals}/PythonBuiltins.java (98%) create mode 100644 larky/src/main/java/com/verygood/security/larky/modules/re/RegexMatcher.java create mode 100644 larky/src/main/java/com/verygood/security/larky/modules/re/RegexPattern.java rename larky/src/main/java/com/verygood/security/larky/{nativelib/test/LarkyAssertions.java => modules/testing/AssertionsModule.java} (92%) create mode 100644 larky/src/main/java/com/verygood/security/larky/modules/testing/README.md rename larky/src/main/java/com/verygood/security/larky/{nativelib/test => modules/testing}/UnittestModule.java (94%) rename larky/src/main/java/com/verygood/security/larky/{stdtypes/structs => modules/types}/Partial.java (97%) rename larky/src/main/java/com/verygood/security/larky/{nativelib/LarkyProperty.java => modules/types/Property.java} (87%) rename larky/src/main/java/com/verygood/security/larky/{stdtypes => modules/types}/hashing/HashObject.java (96%) rename larky/src/main/java/com/verygood/security/larky/{stdtypes => modules/types}/structs/ImmutableStruct.java (74%) rename larky/src/main/java/com/verygood/security/larky/{stdtypes => modules/types}/structs/MutableStruct.java (77%) rename larky/src/main/java/com/verygood/security/larky/{stdtypes => modules/types}/structs/SimpleStruct.java (52%) create mode 100644 larky/src/main/java/com/verygood/security/larky/modules/types/structs/SimpleStructWithMethods.java delete mode 100644 larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java create mode 100644 larky/src/main/java/com/verygood/security/larky/utils/NullPrintStream.java create mode 100644 larky/src/main/resources/stdlib/vendor/pycryptodome.star diff --git a/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java b/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java index 6b28c46e8..63cbdc652 100644 --- a/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java +++ b/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java @@ -20,15 +20,15 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import com.verygood.security.larky.nativelib.LarkyGlobals; -import com.verygood.security.larky.nativelib.PythonBuiltins; -import com.verygood.security.larky.nativelib.std.C99Math; -import com.verygood.security.larky.nativelib.std.Hashlib; -import com.verygood.security.larky.nativelib.std.Json; -import com.verygood.security.larky.nativelib.std.Proto; -import com.verygood.security.larky.nativelib.std.RE2RegexEngine; -import com.verygood.security.larky.nativelib.test.LarkyAssertions; -import com.verygood.security.larky.nativelib.test.UnittestModule; +import com.verygood.security.larky.modules.ProtoBufModule; +import com.verygood.security.larky.modules.globals.LarkyGlobals; +import com.verygood.security.larky.modules.globals.PythonBuiltins; +import com.verygood.security.larky.modules.C99MathModule; +import com.verygood.security.larky.modules.HashModule; +import com.verygood.security.larky.modules.JsonModule; +import com.verygood.security.larky.modules.RegexModule; +import com.verygood.security.larky.modules.testing.AssertionsModule; +import com.verygood.security.larky.modules.testing.UnittestModule; import net.starlark.java.annot.StarlarkBuiltin; import net.starlark.java.eval.StarlarkValue; @@ -47,16 +47,16 @@ public class ModuleSupplier { ); public static final ImmutableSet STD_MODULES = ImmutableSet.of( - Json.INSTANCE, - Proto.INSTANCE, - Hashlib.INSTANCE, - C99Math.INSTANCE, - RE2RegexEngine.INSTANCE + JsonModule.INSTANCE, + ProtoBufModule.INSTANCE, + HashModule.INSTANCE, + C99MathModule.INSTANCE, + RegexModule.INSTANCE ); public static final ImmutableSet TEST_MODULES = ImmutableSet.of( UnittestModule.INSTANCE, - LarkyAssertions.INSTANCE + AssertionsModule.INSTANCE ); private final Map environment; diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/C99Math.java b/larky/src/main/java/com/verygood/security/larky/modules/C99MathModule.java similarity index 95% rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/C99Math.java rename to larky/src/main/java/com/verygood/security/larky/modules/C99MathModule.java index 2d1b7c8e4..88af84970 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/C99Math.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/C99MathModule.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.nativelib.std; +package com.verygood.security.larky.modules; import com.google.common.math.DoubleMath; @@ -14,9 +14,9 @@ name = "c99math", category = "BUILTIN", doc = "This module provides access to the mathematical functions defined by the C99 standard") -public class C99Math implements StarlarkValue { +public class C99MathModule implements StarlarkValue { - public static final C99Math INSTANCE = new C99Math(); + public static final C99MathModule INSTANCE = new C99MathModule(); @StarlarkMethod(name = "PI", doc = "a constant pi", structField = true) public StarlarkFloat PI_CONSTANT() { diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Hashlib.java b/larky/src/main/java/com/verygood/security/larky/modules/HashModule.java similarity index 89% rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/Hashlib.java rename to larky/src/main/java/com/verygood/security/larky/modules/HashModule.java index 02421321f..f3997fce8 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Hashlib.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/HashModule.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.nativelib.std; +package com.verygood.security.larky.modules; import com.google.common.hash.HashCode; import com.google.common.hash.Hashing; @@ -14,9 +14,9 @@ name = "hashlib", category = "BUILTIN", doc = "This module implements a common interface to many different secure hash and message digest algorithms.") -public class Hashlib implements StarlarkValue { +public class HashModule implements StarlarkValue { - public static final Hashlib INSTANCE = new Hashlib(); + public static final HashModule INSTANCE = new HashModule(); @StarlarkMethod( name = "md5", diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java b/larky/src/main/java/com/verygood/security/larky/modules/JsonModule.java similarity index 98% rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java rename to larky/src/main/java/com/verygood/security/larky/modules/JsonModule.java index cc360c06d..d10fd6d59 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/JsonModule.java @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package com.verygood.security.larky.nativelib.std; +package com.verygood.security.larky.modules; import net.starlark.java.annot.Param; import net.starlark.java.annot.StarlarkBuiltin; @@ -35,14 +35,14 @@ // Tests at //src/test/java/net/starlark/java/eval:testdata/json.sky /** - * Json defines the Starlark {@code json} module, which provides functions for encoding/decoding + * JsonModule defines the Starlark {@code json} module, which provides functions for encoding/decoding * Starlark values as JSON (https://tools.ietf.org/html/rfc8259). */ @StarlarkBuiltin( name = "json", category = "core.lib", doc = "Module json is a Starlark module of JSON-related functions.") -public final class Json implements StarlarkValue { +public final class JsonModule implements StarlarkValue { //@formatter:off private static final String _METHOD_ENCODE_DOCUMENTATION = @@ -82,13 +82,13 @@ public final class Json implements StarlarkValue { + "\n" + "Decoding fails if x is not a valid JSON encoding.\n"; //@formatter:on - private Json() {} + private JsonModule() {} /** * The module instance. You may wish to add this to your predeclared environment under the name * "json". */ - public static final Json INSTANCE = new Json(); + public static final JsonModule INSTANCE = new JsonModule(); /** An interface for StarlarkValue subclasses to define their own JSON encoding. */ public interface Encodable { diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Proto.java b/larky/src/main/java/com/verygood/security/larky/modules/ProtoBufModule.java similarity index 96% rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/Proto.java rename to larky/src/main/java/com/verygood/security/larky/modules/ProtoBufModule.java index 00dff8952..6cfa1516d 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Proto.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/ProtoBufModule.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.nativelib.std; +package com.verygood.security.larky.modules; import net.starlark.java.annot.Param; import net.starlark.java.annot.StarlarkBuiltin; @@ -16,7 +16,7 @@ import java.util.Map; /** - * Proto defines the "proto" Starlark module of utilities for protocol message processing. + * ProtoBufModule defines the "proto" Starlark module of utilities for protocol message processing. * * This file is copied currently from Bazel's: * com.google.devtools.build.lib.packages.StarlarkLibrary @@ -25,13 +25,13 @@ name = "proto", category = "BUILTIN", doc = "A module for protocol message processing.") -public final class Proto implements StarlarkValue { +public final class ProtoBufModule implements StarlarkValue { // Note: in due course this is likely to move to net.starlark.java.lib.proto. // Do not add functions that would not belong there! // Functions related to running the protocol compiler belong in proto_common. - public static final Proto INSTANCE = new Proto(); + public static final ProtoBufModule INSTANCE = new ProtoBufModule(); @StarlarkMethod( name = "encode_text", diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/README.md b/larky/src/main/java/com/verygood/security/larky/modules/README.md similarity index 87% rename from larky/src/main/java/com/verygood/security/larky/nativelib/README.md rename to larky/src/main/java/com/verygood/security/larky/modules/README.md index 3a16a420e..678626c8a 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/README.md +++ b/larky/src/main/java/com/verygood/security/larky/modules/README.md @@ -25,3 +25,10 @@ emulate it by: ``` Obviously, range can take a larger number to emulate infinity. + +### Native Module + +Source files for standard library _extension_ modules. + +These are *NOT* built-in modules, but are basically extension wrappers that help +implement the standard library. \ No newline at end of file diff --git a/larky/src/main/java/com/verygood/security/larky/modules/RegexModule.java b/larky/src/main/java/com/verygood/security/larky/modules/RegexModule.java new file mode 100644 index 000000000..85d232d75 --- /dev/null +++ b/larky/src/main/java/com/verygood/security/larky/modules/RegexModule.java @@ -0,0 +1,33 @@ +package com.verygood.security.larky.modules; + +import com.verygood.security.larky.modules.re.RegexPattern; + +import net.starlark.java.annot.StarlarkBuiltin; +import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.StarlarkValue; + + +@StarlarkBuiltin( + name = "re2j", + category = "BUILTIN", + doc = "This module provides access to the linear regular expression matching engine.\n" + + "\n" + + "This package provides an implementation of regular expression matching based on Russ Cox's linear-time RE2 algorithm.\n" + + "\n" + + "The API presented by com.google.re2j mimics that of java.util.regex.Matcher and java.util.regex.Pattern. While not identical, they are similar enough that most users can switch implementations simply by changing their imports.\n" + + "\n" + + "The syntax of the regular expressions accepted is the same general syntax used by Perl, Python, and other languages. More precisely, it is the syntax accepted by the C++ and Go implementations of RE2 described at https://github.com/google/re2/wiki/Syntax, except for \\C (match any byte), which is not supported because in this implementation, the matcher's input is conceptually a stream of Unicode code points, not bytes.\n" + + "\n" + + "The current API is rather small and intended for compatibility with java.util.regex, but the underlying implementation supports some additional features, such as the ability to process input character streams encoded as UTF-8 byte arrays. These may be exposed in a future release if there is sufficient interest." + + "\n" + + "More on syntax here: https://github.com/google/re2/wiki/Syntax") +public class RegexModule implements StarlarkValue { + + public static final RegexModule INSTANCE = new RegexModule(); + + private static final RegexPattern _Pattern = new RegexPattern(); + + @StarlarkMethod(name = "Pattern", doc = "pattern", structField = true) + public static RegexPattern Pattern() { return _Pattern; } + +} diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/LarkyGlobals.java b/larky/src/main/java/com/verygood/security/larky/modules/globals/LarkyGlobals.java similarity index 90% rename from larky/src/main/java/com/verygood/security/larky/nativelib/LarkyGlobals.java rename to larky/src/main/java/com/verygood/security/larky/modules/globals/LarkyGlobals.java index 5af7e1af2..3b7c6b514 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/LarkyGlobals.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/globals/LarkyGlobals.java @@ -1,9 +1,10 @@ -package com.verygood.security.larky.nativelib; +package com.verygood.security.larky.modules.globals; import com.verygood.security.larky.annot.Library; import com.verygood.security.larky.annot.StarlarkConstructor; -import com.verygood.security.larky.stdtypes.structs.Partial; -import com.verygood.security.larky.stdtypes.structs.SimpleStruct; +import com.verygood.security.larky.modules.types.Property; +import com.verygood.security.larky.modules.types.Partial; +import com.verygood.security.larky.modules.types.structs.SimpleStruct; import net.starlark.java.annot.Param; import net.starlark.java.annot.ParamType; @@ -107,8 +108,8 @@ public Partial partial(StarlarkFunction function, Tuple args, Dict kwargs, StarlarkThread thread) { - return LarkyProperty.builder() + public Property property(StarlarkCallable getter, Object setter, Tuple args, Dict kwargs, StarlarkThread thread) { + return Property.builder() .thread(thread) .fget(getter) .fset(setter != Starlark.NONE ? (StarlarkCallable) setter : null) diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java b/larky/src/main/java/com/verygood/security/larky/modules/globals/PythonBuiltins.java similarity index 98% rename from larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java rename to larky/src/main/java/com/verygood/security/larky/modules/globals/PythonBuiltins.java index 380878e46..5c44120cc 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/globals/PythonBuiltins.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.nativelib; +package com.verygood.security.larky.modules.globals; import com.verygood.security.larky.annot.Library; diff --git a/larky/src/main/java/com/verygood/security/larky/modules/re/RegexMatcher.java b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexMatcher.java new file mode 100644 index 000000000..bedf56e14 --- /dev/null +++ b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexMatcher.java @@ -0,0 +1,317 @@ +package com.verygood.security.larky.modules.re; + +import com.google.common.base.Joiner; +import com.google.re2j.Matcher; + +import com.verygood.security.larky.parser.StarlarkUtil; + +import net.starlark.java.annot.Param; +import net.starlark.java.annot.ParamType; +import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.EvalException; +import net.starlark.java.eval.NoneType; +import net.starlark.java.eval.Starlark; +import net.starlark.java.eval.StarlarkInt; +import net.starlark.java.eval.StarlarkList; +import net.starlark.java.eval.StarlarkValue; + +import java.util.Arrays; + +public class RegexMatcher implements StarlarkValue { + private final Matcher matcher; + private final RegexPattern pattern; + + RegexMatcher(Matcher matcher) { + this.matcher = matcher; + this.pattern = new RegexPattern().pattern(matcher.pattern()); + } + + RegexMatcher(Matcher matcher, RegexPattern pattern) { + this.matcher = matcher; + this.pattern = pattern; + } + + @StarlarkMethod( + name = "pattern", + doc = "Returns the RegexPattern associated with this RegexMatcher.\n" + ) + public RegexPattern pattern() { + return pattern; + } + + @StarlarkMethod( + name = "reset", + doc = "Resets the RegexMatcher, rewinding input and discarding any match information.\n", + parameters = { + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + @ParamType(type = NoneType.class) + }, + defaultValue = "None" + ) + } + ) + public RegexMatcher reset(Object input) { + if (NoneType.class.isAssignableFrom(input.getClass())) { + matcher.reset(); + } else if (String.class.isAssignableFrom(input.getClass())) { + matcher.reset(String.valueOf(input)); + } + return this; + } + + @StarlarkMethod( + name = "start", + doc = "Returns the start position of the most recent match." + + "\n" + + "Accepts a group index position, or defaults to 0 if it's the overall match.", + parameters = { + @Param( + name = "index", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + }, + defaultValue = "0" + ) + } + ) + public StarlarkInt start(StarlarkInt index) { + return StarlarkInt.of(matcher.start(index.toIntUnchecked())); + } + + @StarlarkMethod( + name = "end", + doc = "Returns the end position of the most recent match." + + "\n" + + "Accepts a group index position, or defaults to 0 if it's the overall match.", + parameters = { + @Param( + name = "index", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + }, + defaultValue = "0" + ) + } + ) + public StarlarkInt end(StarlarkInt index) { + return StarlarkInt.of(matcher.end(index.toIntUnchecked())); + } + + @StarlarkMethod( + name = "group", + doc = "Returns the most recent match." + + "\n" + + "If no argument or None is passed in, returns the most recent match, or " + + "null if the group was not matched." + + "\n" + + "If a valid integer is returned, returns the subgroup of the most recent match." + + "\n" + + "Throws an exception if group < 0 or group > group_count()", + parameters = { + @Param( + name = "group", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + @ParamType(type = String.class), + @ParamType(type = NoneType.class), + }, + defaultValue = "None") + }) + public Object group(Object group) { + String g; + if (Starlark.isNullOrNone(group)) { + g = matcher.group(); + } else if (StarlarkInt.class.isAssignableFrom(group.getClass())) { + g = matcher.group(((StarlarkInt) group).toIntUnchecked()); + } + // default case + else { + g = matcher.group(String.valueOf(group)); + } + + if (g == null) { + return Starlark.NONE; + } + return g; + + } + + @StarlarkMethod( + name = "group_count", + doc = "Returns the number of subgroups in this pattern.\n" + + "the number of subgroups; the overall match (group 0) does not count\n" + ) + public StarlarkInt groupCount() { + return StarlarkInt.of(matcher.groupCount()); + } + + @StarlarkMethod( + name = "matches", + doc = "Matches the entire input against the pattern (anchored start and end). " + + "If there is a match, matches sets the match state to describe it.\n" + + "the number of subgroups; the overall match (group 0) does not count\n" + + "\n" + + "Returns: true if the entire input matches the pattern" + ) + public boolean matches() { + return matcher.matches(); + } + + @StarlarkMethod( + name = "looking_at", + doc = "Matches the beginning of input against the pattern (anchored start). " + + "If there is a match, looking_at sets the match state to describe it." + + "\n" + + "Returns true if the beginning of the input matches the pattern\n" + ) + public boolean lookingAt() { + return matcher.lookingAt(); + } + + @StarlarkMethod( + name = "find", + doc = "Matches the input against the pattern (unanchored), starting at a specified position." + + " If there is a match, find sets the match state to describe it." + + "\n" + + "start - the input position where the search begins\n" + + "\n" + + "Returns true if it finds a match or throw if start is not a valid input position\n", + parameters = { + @Param( + name = "start", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + @ParamType(type = NoneType.class), + }, + defaultValue = "None" + ) + } + ) + public boolean find(Object start) { + if (Starlark.isNullOrNone(start)) { + return matcher.find(); + } + StarlarkInt s = (StarlarkInt) StarlarkUtil.valueToStarlark(start); + return matcher.find(s.toIntUnchecked()); + } + + @StarlarkMethod( + name = "quote_replacement", + doc = "Quotes '\\' and '$' in s, so that the returned string could be used in " + + "append_replacement(appendable_string, s) as a literal replacement of s.\n" + + "\n" + + "Returns: the quoted string", + parameters = { + @Param( + name = "s", + allowedTypes = { + @ParamType(type = String.class), + } + ) + } + ) + public static String quoteReplacement(String s) { + return Matcher.quoteReplacement(s); + } + + @StarlarkMethod( + name = "append_replacement", + doc = "Appends to sb two strings: the text from the append position up to the " + + "beginning of the most recent match, and then the replacement with submatch groups" + + " substituted for references of the form $n, where n is the group number in decimal" + + ". It advances the append position to where the most recent match ended." + + "\n" + + "To embed a literal $, use \\$ (actually \"\\\\$\" with string escapes). The " + + "escape is only necessary when $ is followed by a digit, but it is always allowed. " + + "Only $ and \\ need escaping, but any character can be escaped." + + "\n" + + "\n" + + "The group number n in $n is always at least one digit and expands to use more " + + "digits as long as the resulting number is a valid group number for this pattern. " + + "To cut it off earlier, escape the first digit that should not be used." + + "\n" + + "Returns: the Matcher itself, for chained method calls\n", + parameters = { + @Param( + name = "sb", + allowedTypes = { + @ParamType(type = StarlarkList.class), + } + ), + @Param( + name = "replacement", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public RegexMatcher appendReplacement(StarlarkList sb, String replacement) { + StringBuilder builder = new StringBuilder().append(Joiner.on("").join(sb)); + matcher.appendReplacement(builder, replacement); + try { + sb.clearElements(); + sb.addElements(Arrays.asList(builder.toString().split(""))); + } catch (EvalException e) { + throw new RuntimeException(e); + } + return this; + } + + @StarlarkMethod( + name = "append_tail", + doc = "Appends to sb the substring of the input from the append position to the " + + "end of the input." + + "\n" + + "Returns the argument sb, for method chaining\n", + parameters = { + @Param( + name = "s", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public String appendTail(String s) { + return matcher.appendTail(new StringBuilder().append(s)).toString(); + } + + @StarlarkMethod( + name = "replace_all", + doc = "Returns the input with all matches replaced by replacement, interpreted as for" + + " append_replacement." + + "\n" + + "The input string with the matches replaced\n", + parameters = { + @Param( + name = "replacement", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public String replaceAll(String replacement) { + return matcher.replaceAll(replacement); + } + + @StarlarkMethod( + name = "replace_first", + doc = "Returns the input with the first match replaced by replacement, " + + "interpreted as for append_replacement.\n" + + "\n" + + "The input string with the first matches replaced\n", + parameters = { + @Param( + name = "replacement", + allowedTypes = { + @ParamType(type = String.class), + } + )} + ) + public String replaceFirst(String replacement) { + return matcher.replaceFirst(replacement); + } + +} diff --git a/larky/src/main/java/com/verygood/security/larky/modules/re/RegexPattern.java b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexPattern.java new file mode 100644 index 000000000..b70a93500 --- /dev/null +++ b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexPattern.java @@ -0,0 +1,261 @@ +package com.verygood.security.larky.modules.re; + +import com.google.re2j.Matcher; +import com.google.re2j.Pattern; + +import net.starlark.java.annot.Param; +import net.starlark.java.annot.ParamType; +import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.Starlark; +import net.starlark.java.eval.StarlarkInt; +import net.starlark.java.eval.StarlarkList; +import net.starlark.java.eval.StarlarkValue; + +import java.util.ArrayList; +import java.util.Arrays; + +// java <> larky objects +public class RegexPattern implements StarlarkValue { + + @StarlarkMethod(name = "CASE_INSENSITIVE", doc = "Flag: case insensitive matching.", structField = true) + public StarlarkInt CASE_INSENSITIVE() { + return StarlarkInt.of(Pattern.CASE_INSENSITIVE); + } + + @StarlarkMethod(name = "DISABLE_UNICODE_GROUPS", doc = "Flag: Unicode groups (e.g. \\p\\ Greek\\ ) will be syntax errors", structField = true) + public StarlarkInt DISABLE_UNICODE_GROUPS() { + return StarlarkInt.of(Pattern.DISABLE_UNICODE_GROUPS); + } + + @StarlarkMethod(name = "DOTALL", doc = "Flag: dot (.) matches all characters, including newline.", structField = true) + public StarlarkInt DOTALL() { + return StarlarkInt.of(Pattern.DOTALL); + } + + @StarlarkMethod(name = "LONGEST_MATCH", doc = "Flag: matches longest possible string.", structField = true) + public StarlarkInt LONGEST_MATCH() { + return StarlarkInt.of(Pattern.LONGEST_MATCH); + } + + @StarlarkMethod(name = "MULTILINE", doc = "Flag: multiline matching: ^ and $ match at beginning and end of line, not just beginning and end of input.", structField = true) + public StarlarkInt MULTILINE() { + return StarlarkInt.of(Pattern.MULTILINE); + } + + private Pattern pattern; + + protected RegexPattern pattern(Pattern pattern) { + this.pattern = pattern; + return this; + } + + @StarlarkMethod( + name = "compile", + doc = "Creates and returns a new Pattern corresponding to compiling regex with the given flags." + + "If flags is not passed, it defaults to 0", + parameters = { + @Param(name = "regex"), + @Param( + name = "flags", + allowedTypes = { + @ParamType(type = StarlarkInt.class), + }, + defaultValue = "0") + }) + public static RegexPattern compile(String regex, StarlarkInt flags) { + int flag = flags.toIntUnchecked(); + return new RegexPattern().pattern(Pattern.compile(regex, flag)); + } + + @StarlarkMethod( + name = "matches", + doc = "Matches a string against a regular expression.", + parameters = { + @Param(name = "regex"), + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + }) + }) + public static boolean matches(String regex, String input) { + return Pattern.matches(regex, input); + } + + @StarlarkMethod( + name = "quote", + doc = "", + parameters = { + @Param( + name = "s", + allowedTypes = { + @ParamType(type = String.class), + }) + }) + public static String quote(String s) { + return Pattern.quote(s); + } + + @StarlarkMethod( + name = "flags", + doc = "" + ) + public StarlarkInt flags() { + return StarlarkInt.of(pattern.flags()); + } + + @StarlarkMethod(name = "pattern", doc = "") + public String pattern() { + return pattern.pattern(); + } + + @StarlarkMethod( + name = "matcher", + doc = "Creates a new Matcher matching the pattern against the input.\n", + parameters = { + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + }) + }) + public RegexMatcher matcher(String input) { + return new RegexMatcher(pattern.matcher(input), this); + } + + @StarlarkMethod( + name = "split", + doc = "", + parameters = { + @Param( + name = "input", + allowedTypes = { + @ParamType(type = String.class), + }), + @Param( + name = "limit", + allowedTypes = { + @ParamType(type = StarlarkInt.class) + }, + defaultValue = "0" + ) + }) + public StarlarkList split(String input, StarlarkInt limit) { + Object[] strings = _py_re_split_impl(input, limit.toIntUnchecked()); + return StarlarkList.immutableCopyOf(Arrays.asList(strings)); + } + + private String[] _jdk_split_impl(CharSequence input, int limit) { + ArrayList matchList = new ArrayList<>(); + Matcher m = pattern.matcher(input); + + int index = 0; + boolean matchLimited = limit > 0; + // Add segments before each match found + while (m.find()) { + if (!matchLimited || matchList.size() < limit - 1) { + if (index == 0 && index == m.start() && m.start() == m.end()) { + // no empty leading substring included for zero-width match + // at the beginning of the input char sequence. + continue; + } + String match = input.subSequence(index, m.start()).toString(); + matchList.add(match); + index = m.end(); + } else if (matchList.size() == limit - 1) { // last one + String match = input.subSequence(index, + input.length()).toString(); + matchList.add(match); + index = m.end(); + + } + } + // If no match was found, return this + if (index == 0) { + return new String[]{input.toString()}; + } + if (!matchLimited || matchList.size() < limit) { + // Add remaining segment + matchList.add(input.subSequence(index, input.length()).toString()); + } + // Construct result + int resultSize = matchList.size(); + if (limit == 0) { + while (resultSize > 0 && matchList.get(resultSize - 1).equals("")) { + resultSize--; + } + } + String[] result = new String[resultSize]; + return matchList.subList(0, resultSize).toArray(result); + } + + private Object[] _py_re_split_impl(CharSequence input, int limit) { + Matcher m = pattern.matcher(input); + ArrayList matchList = new ArrayList<>(); + boolean matchLimited = limit > 0; + boolean has_capture = m.groupCount() > 0; + int index = 0; + String match; + + while (m.find()) { + if (!matchLimited || matchList.size() <= limit - 1) { + match = input.subSequence(index, m.start()).toString(); + matchList.add(match); + index = m.end(); + } else if (matchList.size() == limit - 1) { // last one + match = input.subSequence(index, + input.length()).toString(); + matchList.add(match); + index = m.end(); + } + if (has_capture) { + // Check if there's capture groups and add them + for (int i = 0; i < m.groupCount(); ++i) { + match = m.group(i + 1); + matchList.add(match == null ? Starlark.NONE : match); + } + } + } + + // If no match was found, return this + if (index == 0) { + return new String[]{input.toString()}; + } + // NOTE: If maxsplit is nonzero, at most maxsplit splits occur, + // and the remainder of the string is returned as the final + // element of the list. + if (!matchLimited || matchList.size() <= limit) { + // Add remaining segment + matchList.add(input.subSequence(index, input.length()).toString()); + } + + return matchList.toArray(new Object[0]); + } + + @StarlarkMethod( + name = "group_count", + doc = "Returns the number of subgroups in this pattern.\n" + + "the number of subgroups; the overall match (group 0) does not count\n" + ) + public StarlarkInt groupCount() { + return StarlarkInt.of(pattern.groupCount()); + } + +// @StarlarkMethod( +// name = "findall", +// doc = "Return a list of all non-overlapping matches in the string.\n" + +// "\n" + +// "If one or more capturing groups are present in the pattern, return\n" + +// "a list of groups; this will be a list of tuples if the pattern\n" + +// "has more than one group.\n" + +// "\n" + +// "Empty matches are included in the result.", +// parameters = { +// @Param(name = "input", allowedTypes = {@ParamType(type = String.class)}) +// } +// ) +// public StarlarkList findall(String input) { +// +// } + +} diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/test/LarkyAssertions.java b/larky/src/main/java/com/verygood/security/larky/modules/testing/AssertionsModule.java similarity index 92% rename from larky/src/main/java/com/verygood/security/larky/nativelib/test/LarkyAssertions.java rename to larky/src/main/java/com/verygood/security/larky/modules/testing/AssertionsModule.java index 06a57d032..62057869c 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/test/LarkyAssertions.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/testing/AssertionsModule.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.nativelib.test; +package com.verygood.security.larky.modules.testing; import net.starlark.java.annot.Param; import net.starlark.java.annot.StarlarkBuiltin; @@ -16,9 +16,9 @@ name = "asserts", category = "BUILTIN", doc = "This module implements a ") -public class LarkyAssertions implements StarlarkValue { +public class AssertionsModule implements StarlarkValue { - public static final LarkyAssertions INSTANCE = new LarkyAssertions(); + public static final AssertionsModule INSTANCE = new AssertionsModule(); public interface Reporter { /** diff --git a/larky/src/main/java/com/verygood/security/larky/modules/testing/README.md b/larky/src/main/java/com/verygood/security/larky/modules/testing/README.md new file mode 100644 index 000000000..e13ba9355 --- /dev/null +++ b/larky/src/main/java/com/verygood/security/larky/modules/testing/README.md @@ -0,0 +1,5 @@ +# Testing Modules + +These modules expose testing native modules that are **NOT SAFE** to use in normal Larky embeds. + +As a result, this namespace is only exposed via the LarkyTest suites. \ No newline at end of file diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java b/larky/src/main/java/com/verygood/security/larky/modules/testing/UnittestModule.java similarity index 94% rename from larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java rename to larky/src/main/java/com/verygood/security/larky/modules/testing/UnittestModule.java index 04ecd2909..a9a9670ee 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/test/UnittestModule.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/testing/UnittestModule.java @@ -1,9 +1,10 @@ -package com.verygood.security.larky.nativelib.test; +package com.verygood.security.larky.modules.testing; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterators; -import com.google.common.io.ByteStreams; + +import com.verygood.security.larky.utils.NullPrintStream; import junit.framework.Test; import junit.framework.TestCase; @@ -21,7 +22,6 @@ import net.starlark.java.eval.StarlarkThread; import net.starlark.java.eval.StarlarkValue; -import java.io.PrintStream; import java.util.Iterator; import lombok.Getter; import lombok.Setter; @@ -108,13 +108,6 @@ public Object createTestRunner(StarlarkThread thread) { return new LarkyTestRunner(); } - static final class NullPrintStream extends PrintStream { - @SuppressWarnings("UnstableApiUsage") - public NullPrintStream() { - super(ByteStreams.nullOutputStream()); - } - } - public static class LarkyTestRunner extends TestRunner implements StarlarkValue { public LarkyTestRunner() { //super(System.out); diff --git a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/Partial.java b/larky/src/main/java/com/verygood/security/larky/modules/types/Partial.java similarity index 97% rename from larky/src/main/java/com/verygood/security/larky/stdtypes/structs/Partial.java rename to larky/src/main/java/com/verygood/security/larky/modules/types/Partial.java index a37655dfd..b4d2ee8c2 100644 --- a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/Partial.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/types/Partial.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.stdtypes.structs; +package com.verygood.security.larky.modules.types; import net.starlark.java.eval.Dict; import net.starlark.java.eval.EvalException; diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/LarkyProperty.java b/larky/src/main/java/com/verygood/security/larky/modules/types/Property.java similarity index 87% rename from larky/src/main/java/com/verygood/security/larky/nativelib/LarkyProperty.java rename to larky/src/main/java/com/verygood/security/larky/modules/types/Property.java index 9c70342fd..2177c1548 100644 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/LarkyProperty.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/types/Property.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.nativelib; +package com.verygood.security.larky.modules.types; import com.verygood.security.larky.parser.StarlarkUtil; @@ -20,7 +20,7 @@ import lombok.Builder; @Builder -public class LarkyProperty implements StarlarkValue { +public class Property implements StarlarkValue { final private StarlarkCallable fget; final private StarlarkCallable fset; @@ -46,21 +46,6 @@ public Object set(Object val, String fieldName) throws InterruptedException, Eva return Starlark.call(this.thread, this.fset, Tuple.of(val), Dict.empty()); } -// @StarlarkMethod( -// name = "descriptor", -// doc = "call", -// parameters = { -// @Param( -// name = "callable" -// ) -// }, -// useStarlarkThread = true, -// selfCall = true -// ) -// public LarkyProperty create(Object callable, StarlarkThread thread) throws EvalException { -// return new LarkyDescriptorImpl((StarlarkCallable) callable, thread); -// } - public Object call() throws NoSuchMethodException, EvalException { return call(null, null); } diff --git a/larky/src/main/java/com/verygood/security/larky/stdtypes/hashing/HashObject.java b/larky/src/main/java/com/verygood/security/larky/modules/types/hashing/HashObject.java similarity index 96% rename from larky/src/main/java/com/verygood/security/larky/stdtypes/hashing/HashObject.java rename to larky/src/main/java/com/verygood/security/larky/modules/types/hashing/HashObject.java index c6bfbc438..ccc70ff40 100644 --- a/larky/src/main/java/com/verygood/security/larky/stdtypes/hashing/HashObject.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/types/hashing/HashObject.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.stdtypes.hashing; +package com.verygood.security.larky.modules.types.hashing; import net.starlark.java.annot.StarlarkMethod; import net.starlark.java.eval.StarlarkValue; diff --git a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/ImmutableStruct.java b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/ImmutableStruct.java similarity index 74% rename from larky/src/main/java/com/verygood/security/larky/stdtypes/structs/ImmutableStruct.java rename to larky/src/main/java/com/verygood/security/larky/modules/types/structs/ImmutableStruct.java index c73dca0f2..7e3d4f484 100644 --- a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/ImmutableStruct.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/ImmutableStruct.java @@ -1,4 +1,4 @@ -package com.verygood.security.larky.stdtypes.structs; +package com.verygood.security.larky.modules.types.structs; import com.google.common.collect.ImmutableMap; diff --git a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/MutableStruct.java b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/MutableStruct.java similarity index 77% rename from larky/src/main/java/com/verygood/security/larky/stdtypes/structs/MutableStruct.java rename to larky/src/main/java/com/verygood/security/larky/modules/types/structs/MutableStruct.java index 24b87af64..8d04876a7 100644 --- a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/MutableStruct.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/MutableStruct.java @@ -1,8 +1,8 @@ -package com.verygood.security.larky.stdtypes.structs; +package com.verygood.security.larky.modules.types.structs; import com.google.common.base.Joiner; -import com.verygood.security.larky.nativelib.LarkyProperty; +import com.verygood.security.larky.modules.types.Property; import net.starlark.java.eval.Dict; import net.starlark.java.eval.EvalException; @@ -19,12 +19,12 @@ public Object getValue(String name) { Object field = super.getValue(name); /* if we have assigned a field that is a descriptor, we can invoke it */ if (field == null - || !LarkyProperty.class.isAssignableFrom(field.getClass())) { + || !Property.class.isAssignableFrom(field.getClass())) { return field; } try { - return ((LarkyProperty) field).call(); + return ((Property) field).call(); } catch ( NoSuchMethodException | EvalException exception) { @@ -37,13 +37,13 @@ public void setField(String name, Object value) throws EvalException { Object field = this.fields.get(name); /* if we have assigned a field that is a descriptor, we can invoke it */ if (field == null - || !LarkyProperty.class.isAssignableFrom(field.getClass())) { + || !Property.class.isAssignableFrom(field.getClass())) { ((Dict) fields).putEntry(name, value); return; } try { - ((LarkyProperty) field).call(new Object[]{value, name}, null); + ((Property) field).call(new Object[]{value, name}, null); } catch (NoSuchMethodException exception) { throw new RuntimeException(exception); } diff --git a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/SimpleStruct.java b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/SimpleStruct.java similarity index 52% rename from larky/src/main/java/com/verygood/security/larky/stdtypes/structs/SimpleStruct.java rename to larky/src/main/java/com/verygood/security/larky/modules/types/structs/SimpleStruct.java index a6531693a..f4d58e0c2 100644 --- a/larky/src/main/java/com/verygood/security/larky/stdtypes/structs/SimpleStruct.java +++ b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/SimpleStruct.java @@ -1,15 +1,12 @@ -package com.verygood.security.larky.stdtypes.structs; +package com.verygood.security.larky.modules.types.structs; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import net.starlark.java.annot.StarlarkMethod; import net.starlark.java.eval.Dict; import net.starlark.java.eval.Printer; import net.starlark.java.eval.Starlark; -import net.starlark.java.eval.StarlarkCallable; import net.starlark.java.eval.StarlarkSemantics; -import net.starlark.java.eval.StarlarkThread; import net.starlark.java.eval.Structure; import java.util.Map; @@ -76,55 +73,4 @@ public void repr(Printer p) { p.append(")"); } - // SimpleStructWithMethods augments SimpleStruct's fields with annotated Java methods. - private static final class SimpleStructWithMethods extends SimpleStruct { - - // A function that returns "fromValues". - private static final Object returnFromValues = - new StarlarkCallable() { - @Override - public String getName() { - return "returnFromValues"; - } - - @Override - public Object fastcall(StarlarkThread thread, Object[] positional, Object[] named) { - return "bar"; - } - }; - - SimpleStructWithMethods() { - super( - ImmutableMap.of( - "values_only_field", - "fromValues", - "values_only_method", - returnFromValues, - "collision_field", - "fromValues", - "collision_method", - returnFromValues)); - } - - @StarlarkMethod(name = "callable_only_field", documented = false, structField = true) - public String getCallableOnlyField() { - return "fromStarlarkMethod"; - } - - @StarlarkMethod(name = "callable_only_method", documented = false, structField = false) - public String getCallableOnlyMethod() { - return "fromStarlarkMethod"; - } - - @StarlarkMethod(name = "collision_field", documented = false, structField = true) - public String getCollisionField() { - return "fromStarlarkMethod"; - } - - @StarlarkMethod(name = "collision_method", documented = false, structField = false) - public String getCollisionMethod() { - return "fromStarlarkMethod"; - } - } - } diff --git a/larky/src/main/java/com/verygood/security/larky/modules/types/structs/SimpleStructWithMethods.java b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/SimpleStructWithMethods.java new file mode 100644 index 000000000..fc3eb876a --- /dev/null +++ b/larky/src/main/java/com/verygood/security/larky/modules/types/structs/SimpleStructWithMethods.java @@ -0,0 +1,58 @@ +package com.verygood.security.larky.modules.types.structs; + +import com.google.common.collect.ImmutableMap; + +import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.StarlarkCallable; +import net.starlark.java.eval.StarlarkThread; + +// SimpleStructWithMethods augments SimpleStruct's fields with annotated Java methods. +final class SimpleStructWithMethods extends SimpleStruct { + + // A function that returns "fromValues". + private static final Object returnFromValues = + new StarlarkCallable() { + @Override + public String getName() { + return "returnFromValues"; + } + + @Override + public Object fastcall(StarlarkThread thread, Object[] positional, Object[] named) { + return "bar"; + } + }; + + SimpleStructWithMethods() { + super( + ImmutableMap.of( + "values_only_field", + "fromValues", + "values_only_method", + returnFromValues, + "collision_field", + "fromValues", + "collision_method", + returnFromValues)); + } + + @StarlarkMethod(name = "callable_only_field", documented = false, structField = true) + public String getCallableOnlyField() { + return "fromStarlarkMethod"; + } + + @StarlarkMethod(name = "callable_only_method", documented = false, structField = false) + public String getCallableOnlyMethod() { + return "fromStarlarkMethod"; + } + + @StarlarkMethod(name = "collision_field", documented = false, structField = true) + public String getCollisionField() { + return "fromStarlarkMethod"; + } + + @StarlarkMethod(name = "collision_method", documented = false, structField = false) + public String getCollisionMethod() { + return "fromStarlarkMethod"; + } +} diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java b/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java deleted file mode 100644 index df786780f..000000000 --- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/RE2RegexEngine.java +++ /dev/null @@ -1,581 +0,0 @@ -package com.verygood.security.larky.nativelib.std; - -import com.google.common.base.Joiner; -import com.google.re2j.Matcher; -import com.google.re2j.Pattern; - -import com.verygood.security.larky.parser.StarlarkUtil; - -import net.starlark.java.annot.Param; -import net.starlark.java.annot.ParamType; -import net.starlark.java.annot.StarlarkBuiltin; -import net.starlark.java.annot.StarlarkMethod; -import net.starlark.java.eval.EvalException; -import net.starlark.java.eval.NoneType; -import net.starlark.java.eval.Starlark; -import net.starlark.java.eval.StarlarkInt; -import net.starlark.java.eval.StarlarkList; -import net.starlark.java.eval.StarlarkValue; - -import java.util.ArrayList; -import java.util.Arrays; - - -@StarlarkBuiltin( - name = "re2j", - category = "BUILTIN", - doc = "This module provides access to the linear regular expression matching engine.\n" + - "\n" + - "This package provides an implementation of regular expression matching based on Russ Cox's linear-time RE2 algorithm.\n" + - "\n" + - "The API presented by com.google.re2j mimics that of java.util.regex.Matcher and java.util.regex.Pattern. While not identical, they are similar enough that most users can switch implementations simply by changing their imports.\n" + - "\n" + - "The syntax of the regular expressions accepted is the same general syntax used by Perl, Python, and other languages. More precisely, it is the syntax accepted by the C++ and Go implementations of RE2 described at https://github.com/google/re2/wiki/Syntax, except for \\C (match any byte), which is not supported because in this implementation, the matcher's input is conceptually a stream of Unicode code points, not bytes.\n" + - "\n" + - "The current API is rather small and intended for compatibility with java.util.regex, but the underlying implementation supports some additional features, such as the ability to process input character streams encoded as UTF-8 byte arrays. These may be exposed in a future release if there is sufficient interest." + - "\n" + - "More on syntax here: https://github.com/google/re2/wiki/Syntax") -public class RE2RegexEngine implements StarlarkValue { - - public static final RE2RegexEngine INSTANCE = new RE2RegexEngine(); - - private static final LarkyRegexPattern _Pattern = new LarkyRegexPattern(); - - @StarlarkMethod(name = "Pattern", doc = "pattern", structField = true) - public static LarkyRegexPattern Pattern() { return _Pattern; } - - // java <> larky objects - public static class LarkyRegexPattern implements StarlarkValue { - - @StarlarkMethod(name = "CASE_INSENSITIVE", doc = "Flag: case insensitive matching.", structField = true) - public StarlarkInt CASE_INSENSITIVE() { return StarlarkInt.of(Pattern.CASE_INSENSITIVE); } - - @StarlarkMethod(name = "DISABLE_UNICODE_GROUPS", doc = "Flag: Unicode groups (e.g. \\p\\ Greek\\ ) will be syntax errors", structField = true) - public StarlarkInt DISABLE_UNICODE_GROUPS() { return StarlarkInt.of(Pattern.DISABLE_UNICODE_GROUPS); } - - @StarlarkMethod(name = "DOTALL", doc = "Flag: dot (.) matches all characters, including newline.", structField = true) - public StarlarkInt DOTALL() { return StarlarkInt.of(Pattern.DOTALL); } - - @StarlarkMethod(name = "LONGEST_MATCH", doc = "Flag: matches longest possible string.", structField = true) - public StarlarkInt LONGEST_MATCH() { return StarlarkInt.of(Pattern.LONGEST_MATCH); } - - @StarlarkMethod(name = "MULTILINE", doc = "Flag: multiline matching: ^ and $ match at beginning and end of line, not just beginning and end of input.", structField = true) - public StarlarkInt MULTILINE() { return StarlarkInt.of(Pattern.MULTILINE); } - - private Pattern pattern; - - protected LarkyRegexPattern pattern(Pattern pattern) { - this.pattern = pattern; - return this; - } - - @StarlarkMethod( - name = "compile", - doc = "Creates and returns a new Pattern corresponding to compiling regex with the given flags." + - "If flags is not passed, it defaults to 0", - parameters = { - @Param(name = "regex"), - @Param( - name = "flags", - allowedTypes = { - @ParamType(type = StarlarkInt.class), - }, - defaultValue = "0") - }) - public static LarkyRegexPattern compile(String regex, StarlarkInt flags) { - int flag = flags.toIntUnchecked(); - return new LarkyRegexPattern().pattern(Pattern.compile(regex, flag)); - } - - @StarlarkMethod( - name = "matches", - doc = "Matches a string against a regular expression.", - parameters = { - @Param(name = "regex"), - @Param( - name = "input", - allowedTypes = { - @ParamType(type = String.class), - }) - }) - public static boolean matches(String regex, String input) { - return Pattern.matches(regex, input); - } - - @StarlarkMethod( - name = "quote", - doc = "", - parameters = { - @Param( - name = "s", - allowedTypes = { - @ParamType(type = String.class), - }) - }) - public static String quote(String s) { - return Pattern.quote(s); - } - - @StarlarkMethod( - name = "flags", - doc = "" - ) - public StarlarkInt flags() { - return StarlarkInt.of(pattern.flags()); - } - - @StarlarkMethod(name="pattern", doc="") - public String pattern() { - return pattern.pattern(); - } - - @StarlarkMethod( - name = "matcher", - doc = "Creates a new Matcher matching the pattern against the input.\n", - parameters = { - @Param( - name = "input", - allowedTypes = { - @ParamType(type = String.class), - }) - }) - public LarkyRegexMatcher matcher(String input) { - return new LarkyRegexMatcher(pattern.matcher(input), this); - } - - @StarlarkMethod( - name = "split", - doc = "", - parameters = { - @Param( - name = "input", - allowedTypes = { - @ParamType(type = String.class), - }), - @Param( - name = "limit", - allowedTypes = { - @ParamType(type = StarlarkInt.class) - }, - defaultValue = "0" - ) - }) - public StarlarkList split(String input, StarlarkInt limit) { - Object[] strings = _py_re_split_impl(input, limit.toIntUnchecked()); - return StarlarkList.immutableCopyOf(Arrays.asList(strings)); - } - - private String[] _jdk_split_impl(CharSequence input, int limit) { - ArrayList matchList = new ArrayList<>(); - Matcher m = pattern.matcher(input); - - int index = 0; - boolean matchLimited = limit > 0; - // Add segments before each match found - while (m.find()) { - if (!matchLimited || matchList.size() < limit - 1) { - if (index == 0 && index == m.start() && m.start() == m.end()) { - // no empty leading substring included for zero-width match - // at the beginning of the input char sequence. - continue; - } - String match = input.subSequence(index, m.start()).toString(); - matchList.add(match); - index = m.end(); - } else if (matchList.size() == limit - 1) { // last one - String match = input.subSequence(index, - input.length()).toString(); - matchList.add(match); - index = m.end(); - - } - } - // If no match was found, return this - if (index == 0) { - return new String[]{input.toString()}; - } - if (!matchLimited || matchList.size() < limit) { - // Add remaining segment - matchList.add(input.subSequence(index, input.length()).toString()); - } - // Construct result - int resultSize = matchList.size(); - if (limit == 0) { - while (resultSize > 0 && matchList.get(resultSize - 1).equals("")) { - resultSize--; - } - } - String[] result = new String[resultSize]; - return matchList.subList(0, resultSize).toArray(result); - } - - private Object[] _py_re_split_impl(CharSequence input, int limit) { - Matcher m = pattern.matcher(input); - ArrayList matchList = new ArrayList<>(); - boolean matchLimited = limit > 0; - boolean has_capture = m.groupCount() > 0; - int index = 0; - String match; - - while(m.find()) { - if (!matchLimited || matchList.size() <= limit - 1) { - match = input.subSequence(index, m.start()).toString(); - matchList.add(match); - index = m.end(); - } else if (matchList.size() == limit - 1) { // last one - match = input.subSequence(index, - input.length()).toString(); - matchList.add(match); - index = m.end(); - } - if(has_capture) { - // Check if there's capture groups and add them - for(int i = 0; i < m.groupCount(); ++i) { - match = m.group(i+1); - matchList.add(match == null ? Starlark.NONE : match); - } - } - } - - // If no match was found, return this - if (index == 0) { - return new String[] {input.toString()}; - } - // NOTE: If maxsplit is nonzero, at most maxsplit splits occur, - // and the remainder of the string is returned as the final - // element of the list. - if (!matchLimited || matchList.size() <= limit) { - // Add remaining segment - matchList.add(input.subSequence(index, input.length()).toString()); - } - - return matchList.toArray(new Object[0]); - } - - @StarlarkMethod( - name = "group_count", - doc = "Returns the number of subgroups in this pattern.\n" + - "the number of subgroups; the overall match (group 0) does not count\n" - ) - public StarlarkInt groupCount() { - return StarlarkInt.of(pattern.groupCount()); - } - -// @StarlarkMethod( -// name = "findall", -// doc = "Return a list of all non-overlapping matches in the string.\n" + -// "\n" + -// "If one or more capturing groups are present in the pattern, return\n" + -// "a list of groups; this will be a list of tuples if the pattern\n" + -// "has more than one group.\n" + -// "\n" + -// "Empty matches are included in the result.", -// parameters = { -// @Param(name = "input", allowedTypes = {@ParamType(type = String.class)}) -// } -// ) -// public StarlarkList findall(String input) { -// -// } - - } - - public static class LarkyRegexMatcher implements StarlarkValue { - private final Matcher matcher; - private final LarkyRegexPattern pattern; - - LarkyRegexMatcher(Matcher matcher) { - this.matcher = matcher; - this.pattern = new LarkyRegexPattern().pattern(matcher.pattern()); - } - - LarkyRegexMatcher(Matcher matcher, LarkyRegexPattern pattern) { - this.matcher = matcher; - this.pattern = pattern; - } - - @StarlarkMethod( - name = "pattern", - doc = "Returns the LarkyRegexPattern associated with this LarkyRegexMatcher.\n" - ) - public LarkyRegexPattern pattern() { - return pattern; - } - - @StarlarkMethod( - name = "reset", - doc = "Resets the LarkyRegexMatcher, rewinding input and discarding any match information.\n", - parameters = { - @Param( - name = "input", - allowedTypes = { - @ParamType(type = String.class), - @ParamType(type = NoneType.class) - }, - defaultValue = "None" - ) - } - ) - public LarkyRegexMatcher reset(Object input) { - if(NoneType.class.isAssignableFrom(input.getClass())) { - matcher.reset(); - } - else if(String.class.isAssignableFrom(input.getClass())) { - matcher.reset(String.valueOf(input)); - } - return this; - } - - @StarlarkMethod( - name = "start", - doc = "Returns the start position of the most recent match." + - "\n" + - "Accepts a group index position, or defaults to 0 if it's the overall match.", - parameters = { - @Param( - name = "index", - allowedTypes = { - @ParamType(type = StarlarkInt.class), - }, - defaultValue = "0" - ) - } - ) - public StarlarkInt start(StarlarkInt index) { - return StarlarkInt.of(matcher.start(index.toIntUnchecked())); - } - @StarlarkMethod( - name = "end", - doc = "Returns the end position of the most recent match." + - "\n" + - "Accepts a group index position, or defaults to 0 if it's the overall match.", - parameters = { - @Param( - name = "index", - allowedTypes = { - @ParamType(type = StarlarkInt.class), - }, - defaultValue = "0" - ) - } - ) - public StarlarkInt end(StarlarkInt index) { - return StarlarkInt.of(matcher.end(index.toIntUnchecked())); - } - - @StarlarkMethod( - name = "group", - doc = "Returns the most recent match." + - "\n" + - "If no argument or None is passed in, returns the most recent match, or " + - "null if the group was not matched." + - "\n" + - "If a valid integer is returned, returns the subgroup of the most recent match." + - "\n" + - "Throws an exception if group < 0 or group > group_count()", - parameters = { - @Param( - name = "group", - allowedTypes = { - @ParamType(type = StarlarkInt.class), - @ParamType(type = String.class), - @ParamType(type = NoneType.class), - }, - defaultValue = "None") - }) - public Object group(Object group) { - String g; - if(Starlark.isNullOrNone(group)) { - g = matcher.group(); - } - else if(StarlarkInt.class.isAssignableFrom(group.getClass())) { - g = matcher.group(((StarlarkInt)group).toIntUnchecked()); - } - // default case - else { - g = matcher.group(String.valueOf(group)); - } - - if(g == null) { - return Starlark.NONE; - } - return g; - - } - @StarlarkMethod( - name = "group_count", - doc = "Returns the number of subgroups in this pattern.\n" + - "the number of subgroups; the overall match (group 0) does not count\n" - ) - public StarlarkInt groupCount() { - return StarlarkInt.of(matcher.groupCount()); - } - - @StarlarkMethod( - name = "matches", - doc = "Matches the entire input against the pattern (anchored start and end). " + - "If there is a match, matches sets the match state to describe it.\n" + - "the number of subgroups; the overall match (group 0) does not count\n" + - "\n" + - "Returns: true if the entire input matches the pattern" - ) - public boolean matches() { - return matcher.matches(); - } - - @StarlarkMethod( - name = "looking_at", - doc = "Matches the beginning of input against the pattern (anchored start). " + - "If there is a match, looking_at sets the match state to describe it." + - "\n" + - "Returns true if the beginning of the input matches the pattern\n" - ) - public boolean lookingAt() { - return matcher.lookingAt(); - } - - @StarlarkMethod( - name = "find", - doc = "Matches the input against the pattern (unanchored), starting at a specified position." + - " If there is a match, find sets the match state to describe it." + - "\n" + - "start - the input position where the search begins\n" + - "\n" + - "Returns true if it finds a match or throw if start is not a valid input position\n", - parameters = { - @Param( - name = "start", - allowedTypes = { - @ParamType(type = StarlarkInt.class), - @ParamType(type = NoneType.class), - }, - defaultValue = "None" - ) - } - ) - public boolean find(Object start) { - if(Starlark.isNullOrNone(start)) { - return matcher.find(); - } - StarlarkInt s = (StarlarkInt) StarlarkUtil.valueToStarlark(start); - return matcher.find(s.toIntUnchecked()); - } - - @StarlarkMethod( - name="quote_replacement", - doc = "Quotes '\\' and '$' in s, so that the returned string could be used in " + - "append_replacement(appendable_string, s) as a literal replacement of s.\n" + - "\n" + - "Returns: the quoted string", - parameters = { - @Param( - name = "s", - allowedTypes = { - @ParamType(type = String.class), - } - ) - } - ) - public static String quoteReplacement(String s) { - return Matcher.quoteReplacement(s); - } - - @StarlarkMethod( - name="append_replacement", - doc = "Appends to sb two strings: the text from the append position up to the " + - "beginning of the most recent match, and then the replacement with submatch groups" + - " substituted for references of the form $n, where n is the group number in decimal" + - ". It advances the append position to where the most recent match ended." + - "\n" + - "To embed a literal $, use \\$ (actually \"\\\\$\" with string escapes). The " + - "escape is only necessary when $ is followed by a digit, but it is always allowed. " + - "Only $ and \\ need escaping, but any character can be escaped." + - "\n" + - "\n" + - "The group number n in $n is always at least one digit and expands to use more " + - "digits as long as the resulting number is a valid group number for this pattern. " + - "To cut it off earlier, escape the first digit that should not be used." + - "\n" + - "Returns: the Matcher itself, for chained method calls\n", - parameters = { - @Param( - name = "sb", - allowedTypes = { - @ParamType(type = StarlarkList.class), - } - ), - @Param( - name = "replacement", - allowedTypes = { - @ParamType(type = String.class), - } - )} - ) - public LarkyRegexMatcher appendReplacement(StarlarkList sb, String replacement) { - StringBuilder builder = new StringBuilder().append(Joiner.on("").join(sb)); - matcher.appendReplacement(builder, replacement); - try { - sb.clearElements(); - sb.addElements(Arrays.asList(builder.toString().split(""))); - } catch (EvalException e) { - throw new RuntimeException(e); - } - return this; - } - - @StarlarkMethod( - name="append_tail", - doc = "Appends to sb the substring of the input from the append position to the " + - "end of the input." + - "\n" + - "Returns the argument sb, for method chaining\n", - parameters = { - @Param( - name = "s", - allowedTypes = { - @ParamType(type = String.class), - } - )} - ) - public String appendTail(String s) { - return matcher.appendTail(new StringBuilder().append(s)).toString(); - } - - @StarlarkMethod( - name="replace_all", - doc = "Returns the input with all matches replaced by replacement, interpreted as for" + - " append_replacement." + - "\n" + - "The input string with the matches replaced\n", - parameters = { - @Param( - name = "replacement", - allowedTypes = { - @ParamType(type = String.class), - } - )} - ) - public String replaceAll(String replacement) { - return matcher.replaceAll(replacement); - } - - @StarlarkMethod( - name="replace_first", - doc = "Returns the input with the first match replaced by replacement, " + - "interpreted as for append_replacement.\n" + - "\n" + - "The input string with the first matches replaced\n", - parameters = { - @Param( - name = "replacement", - allowedTypes = { - @ParamType(type = String.class), - } - )} - ) - public String replaceFirst(String replacement) { - return matcher.replaceFirst(replacement); - } - - } -} diff --git a/larky/src/main/java/com/verygood/security/larky/utils/NullPrintStream.java b/larky/src/main/java/com/verygood/security/larky/utils/NullPrintStream.java new file mode 100644 index 000000000..f244aca38 --- /dev/null +++ b/larky/src/main/java/com/verygood/security/larky/utils/NullPrintStream.java @@ -0,0 +1,12 @@ +package com.verygood.security.larky.utils; + +import com.google.common.io.ByteStreams; + +import java.io.PrintStream; + +public final class NullPrintStream extends PrintStream { + @SuppressWarnings("UnstableApiUsage") + public NullPrintStream() { + super(ByteStreams.nullOutputStream()); + } +} diff --git a/larky/src/main/resources/stdlib/vendor/pycryptodome.star b/larky/src/main/resources/stdlib/vendor/pycryptodome.star new file mode 100644 index 000000000..e69de29bb diff --git a/larky/src/test/java/com/verygood/security/larky/LarkyTest.java b/larky/src/test/java/com/verygood/security/larky/LarkyTest.java index f867acf5d..181144537 100644 --- a/larky/src/test/java/com/verygood/security/larky/LarkyTest.java +++ b/larky/src/test/java/com/verygood/security/larky/LarkyTest.java @@ -4,10 +4,10 @@ import com.google.common.collect.ImmutableSet; +import com.verygood.security.larky.modules.globals.LarkyGlobals; +import com.verygood.security.larky.modules.globals.PythonBuiltins; import com.verygood.security.larky.console.testing.TestingConsole; -import com.verygood.security.larky.nativelib.LarkyGlobals; -import com.verygood.security.larky.nativelib.PythonBuiltins; -import com.verygood.security.larky.nativelib.test.UnittestModule; +import com.verygood.security.larky.modules.testing.UnittestModule; import com.verygood.security.larky.parser.LarkyScript; import com.verygood.security.larky.parser.ParsedStarFile; import com.verygood.security.larky.parser.PathBasedStarFile; diff --git a/larky/src/test/java/com/verygood/security/larky/ScriptTest.java b/larky/src/test/java/com/verygood/security/larky/ScriptTest.java index 0a93e5d2d..4873c185d 100644 --- a/larky/src/test/java/com/verygood/security/larky/ScriptTest.java +++ b/larky/src/test/java/com/verygood/security/larky/ScriptTest.java @@ -39,7 +39,7 @@ import net.starlark.java.eval.StarlarkThread; import net.starlark.java.eval.StarlarkValue; import net.starlark.java.eval.Structure; -import com.verygood.security.larky.nativelib.std.Json; +import com.verygood.security.larky.modules.JsonModule; import net.starlark.java.syntax.FileOptions; import net.starlark.java.syntax.ParserInput; import net.starlark.java.syntax.SyntaxError; @@ -173,7 +173,7 @@ public static void main(String[] args) throws Exception { ParserInput input = ParserInput.fromString(buf.toString(), file.toString()); ImmutableMap.Builder predeclared = ImmutableMap.builder(); Starlark.addMethods(predeclared, new ScriptTest()); // e.g. assert_eq - predeclared.put("json", Json.INSTANCE); + predeclared.put("json", JsonModule.INSTANCE); StarlarkSemantics semantics = StarlarkSemantics.DEFAULT; Module module = Module.withPredeclared(semantics, predeclared.build());