diff --git a/larky/pom.xml b/larky/pom.xml
index 64bb2aa40..37d4fa45e 100644
--- a/larky/pom.xml
+++ b/larky/pom.xml
@@ -10,22 +10,6 @@
4.0.0larky
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
- 11
- 11
-
-
-
-
-
-
- true
-
diff --git a/larky/src/main/java/com/verygood/security/larky/Larky.java b/larky/src/main/java/com/verygood/security/larky/Larky.java
index e8f8522d8..01aa4cf45 100644
--- a/larky/src/main/java/com/verygood/security/larky/Larky.java
+++ b/larky/src/main/java/com/verygood/security/larky/Larky.java
@@ -1,6 +1,8 @@
package com.verygood.security.larky;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import com.google.common.annotations.VisibleForTesting;
import net.starlark.java.eval.EvalException;
@@ -17,16 +19,16 @@
import net.starlark.java.syntax.SyntaxError;
import java.io.BufferedReader;
+import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.time.Duration;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
public class Larky {
private static final String START_PROMPT = ">> ";
@@ -143,7 +145,11 @@ static int execute(ParserInput input) {
}
static void writeOutput(String outputFile, StarlarkValue returnValue) throws IOException {
- Files.writeString(Paths.get(outputFile), returnValue.toString(), StandardOpenOption.CREATE);
+ try (BufferedWriter bw = Files.newBufferedWriter(Paths.get(outputFile),
+ Charset.defaultCharset(),
+ StandardOpenOption.CREATE)) {
+ bw.write(returnValue.toString());
+ }
}
public static void main(String[] args) throws Exception {
diff --git a/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java b/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java
index bd888c257..63cbdc652 100644
--- a/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java
+++ b/larky/src/main/java/com/verygood/security/larky/ModuleSupplier.java
@@ -20,17 +20,18 @@
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
-import com.verygood.security.larky.nativelib.LarkyGlobals;
-import com.verygood.security.larky.nativelib.PythonBuiltins;
-import com.verygood.security.larky.nativelib.std.C99Math;
-import com.verygood.security.larky.nativelib.std.Hashlib;
+import com.verygood.security.larky.modules.ProtoBufModule;
+import com.verygood.security.larky.modules.globals.LarkyGlobals;
+import com.verygood.security.larky.modules.globals.PythonBuiltins;
+import com.verygood.security.larky.modules.C99MathModule;
+import com.verygood.security.larky.modules.HashModule;
+import com.verygood.security.larky.modules.JsonModule;
+import com.verygood.security.larky.modules.RegexModule;
+import com.verygood.security.larky.modules.testing.AssertionsModule;
+import com.verygood.security.larky.modules.testing.UnittestModule;
import net.starlark.java.annot.StarlarkBuiltin;
import net.starlark.java.eval.StarlarkValue;
-import com.verygood.security.larky.nativelib.std.Json;
-import com.verygood.security.larky.nativelib.std.Proto;
-import com.verygood.security.larky.nativelib.test.LarkyAssertions;
-import com.verygood.security.larky.nativelib.test.UnittestModule;
import java.util.Map;
import java.util.function.Function;
@@ -46,15 +47,16 @@ public class ModuleSupplier {
);
public static final ImmutableSet STD_MODULES = ImmutableSet.of(
- Json.INSTANCE,
- Proto.INSTANCE,
- Hashlib.INSTANCE,
- C99Math.INSTANCE
+ JsonModule.INSTANCE,
+ ProtoBufModule.INSTANCE,
+ HashModule.INSTANCE,
+ C99MathModule.INSTANCE,
+ RegexModule.INSTANCE
);
public static final ImmutableSet TEST_MODULES = ImmutableSet.of(
UnittestModule.INSTANCE,
- LarkyAssertions.INSTANCE
+ AssertionsModule.INSTANCE
);
private final Map environment;
diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/C99Math.java b/larky/src/main/java/com/verygood/security/larky/modules/C99MathModule.java
similarity index 95%
rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/C99Math.java
rename to larky/src/main/java/com/verygood/security/larky/modules/C99MathModule.java
index 2d1b7c8e4..88af84970 100644
--- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/C99Math.java
+++ b/larky/src/main/java/com/verygood/security/larky/modules/C99MathModule.java
@@ -1,4 +1,4 @@
-package com.verygood.security.larky.nativelib.std;
+package com.verygood.security.larky.modules;
import com.google.common.math.DoubleMath;
@@ -14,9 +14,9 @@
name = "c99math",
category = "BUILTIN",
doc = "This module provides access to the mathematical functions defined by the C99 standard")
-public class C99Math implements StarlarkValue {
+public class C99MathModule implements StarlarkValue {
- public static final C99Math INSTANCE = new C99Math();
+ public static final C99MathModule INSTANCE = new C99MathModule();
@StarlarkMethod(name = "PI", doc = "a constant pi", structField = true)
public StarlarkFloat PI_CONSTANT() {
diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Hashlib.java b/larky/src/main/java/com/verygood/security/larky/modules/HashModule.java
similarity index 89%
rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/Hashlib.java
rename to larky/src/main/java/com/verygood/security/larky/modules/HashModule.java
index 02421321f..f3997fce8 100644
--- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Hashlib.java
+++ b/larky/src/main/java/com/verygood/security/larky/modules/HashModule.java
@@ -1,4 +1,4 @@
-package com.verygood.security.larky.nativelib.std;
+package com.verygood.security.larky.modules;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
@@ -14,9 +14,9 @@
name = "hashlib",
category = "BUILTIN",
doc = "This module implements a common interface to many different secure hash and message digest algorithms.")
-public class Hashlib implements StarlarkValue {
+public class HashModule implements StarlarkValue {
- public static final Hashlib INSTANCE = new Hashlib();
+ public static final HashModule INSTANCE = new HashModule();
@StarlarkMethod(
name = "md5",
diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java b/larky/src/main/java/com/verygood/security/larky/modules/JsonModule.java
similarity index 89%
rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java
rename to larky/src/main/java/com/verygood/security/larky/modules/JsonModule.java
index f9cb2deb0..d10fd6d59 100644
--- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Json.java
+++ b/larky/src/main/java/com/verygood/security/larky/modules/JsonModule.java
@@ -12,10 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package com.verygood.security.larky.nativelib.std;
+package com.verygood.security.larky.modules;
-import java.util.Arrays;
-import java.util.Map;
import net.starlark.java.annot.Param;
import net.starlark.java.annot.StarlarkBuiltin;
import net.starlark.java.annot.StarlarkMethod;
@@ -31,25 +29,66 @@
import net.starlark.java.eval.StarlarkValue;
import net.starlark.java.eval.Structure;
+import java.util.Arrays;
+import java.util.Map;
+
// Tests at //src/test/java/net/starlark/java/eval:testdata/json.sky
/**
- * Json defines the Starlark {@code json} module, which provides functions for encoding/decoding
+ * JsonModule defines the Starlark {@code json} module, which provides functions for encoding/decoding
* Starlark values as JSON (https://tools.ietf.org/html/rfc8259).
*/
@StarlarkBuiltin(
name = "json",
category = "core.lib",
doc = "Module json is a Starlark module of JSON-related functions.")
-public final class Json implements StarlarkValue {
-
- private Json() {}
+public final class JsonModule implements StarlarkValue {
+
+ //@formatter:off
+ private static final String _METHOD_ENCODE_DOCUMENTATION =
+ "
The encode function accepts one required positional argument, which it converts to"
+ + " JSON by cases:\n"
+ + "
\n"
+ + "
None, True, and False are converted to 'null', 'true', and 'false',"
+ + " respectively.\n"
+ + "
An int, no matter how large, is encoded as a decimal integer. Some decoders"
+ + " may not be able to decode very large integers.\n"
+ + "
A float is encoded using a decimal point or an exponent or both, even if its"
+ + " numeric value is an integer. It is an error to encode a non-finite "
+ + " floating-point value.\n"
+ + "
A string value is encoded as a JSON string literal that denotes the value. "
+ + " Each unpaired surrogate is replaced by U+FFFD.\n"
+ + "
A dict is encoded as a JSON object, in key order. It is an error if any key"
+ + " is not a string.\n"
+ + "
A list or tuple is encoded as a JSON array.\n"
+ + "
A struct-like value is encoded as a JSON object, in field name order.\n"
+ + "
\n"
+ + "An application-defined type may define its own JSON encoding.\n"
+ + "Encoding any other value yields an error.\n";
+ //@formatter:on
+
+ //@formatter:off
+ private static final String _METHOD_DECODE_DOCUMENTATION =
+ "The decode function accepts one positional parameter, a JSON string.\n"
+ + "It returns the Starlark value that the string denotes.\n"
+ + "
"
+ + "
'null', 'true', and 'false' are parsed as None, True, and False.\n"
+ + "
Numbers are parsed as int, or as a float if they contain"
+ + " a decimal point or an exponent. Although JSON has no syntax "
+ + " for non-finite values, very large values may be decoded as infinity.\n"
+ + "
a JSON object is parsed as a new unfrozen Starlark dict."
+ + " Keys must be unique strings.\n"
+ + "
a JSON array is parsed as new unfrozen Starlark list.\n"
+ + "
\n"
+ + "Decoding fails if x is not a valid JSON encoding.\n";
+ //@formatter:on
+ private JsonModule() {}
/**
* The module instance. You may wish to add this to your predeclared environment under the name
* "json".
*/
- public static final Json INSTANCE = new Json();
+ public static final JsonModule INSTANCE = new JsonModule();
/** An interface for StarlarkValue subclasses to define their own JSON encoding. */
public interface Encodable {
@@ -100,6 +139,14 @@ public String encode(Object x) throws EvalException {
return enc.out.toString();
}
+ @StarlarkMethod(
+ name = "dumps",
+ doc =_METHOD_ENCODE_DOCUMENTATION,
+ parameters = {@Param(name = "x")})
+ public String dumps(Object x) throws EvalException {
+ return encode(x);
+ }
+
private static final class Encoder {
private final StringBuilder out = new StringBuilder();
@@ -297,6 +344,15 @@ public Object decode(String x, StarlarkThread thread) throws EvalException {
return new Decoder(thread.mutability(), x).decode();
}
+ @StarlarkMethod(
+ name = "loads",
+ doc =_METHOD_DECODE_DOCUMENTATION,
+ parameters = {@Param(name = "x")},
+ useStarlarkThread = true)
+ public Object loads(String x, StarlarkThread thread) throws EvalException {
+ return decode(x, thread);
+ }
+
private static final class Decoder {
// The decoder necessarily makes certain representation choices
diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Proto.java b/larky/src/main/java/com/verygood/security/larky/modules/ProtoBufModule.java
similarity index 96%
rename from larky/src/main/java/com/verygood/security/larky/nativelib/std/Proto.java
rename to larky/src/main/java/com/verygood/security/larky/modules/ProtoBufModule.java
index 00dff8952..6cfa1516d 100644
--- a/larky/src/main/java/com/verygood/security/larky/nativelib/std/Proto.java
+++ b/larky/src/main/java/com/verygood/security/larky/modules/ProtoBufModule.java
@@ -1,4 +1,4 @@
-package com.verygood.security.larky.nativelib.std;
+package com.verygood.security.larky.modules;
import net.starlark.java.annot.Param;
import net.starlark.java.annot.StarlarkBuiltin;
@@ -16,7 +16,7 @@
import java.util.Map;
/**
- * Proto defines the "proto" Starlark module of utilities for protocol message processing.
+ * ProtoBufModule defines the "proto" Starlark module of utilities for protocol message processing.
*
* This file is copied currently from Bazel's:
* com.google.devtools.build.lib.packages.StarlarkLibrary
@@ -25,13 +25,13 @@
name = "proto",
category = "BUILTIN",
doc = "A module for protocol message processing.")
-public final class Proto implements StarlarkValue {
+public final class ProtoBufModule implements StarlarkValue {
// Note: in due course this is likely to move to net.starlark.java.lib.proto.
// Do not add functions that would not belong there!
// Functions related to running the protocol compiler belong in proto_common.
- public static final Proto INSTANCE = new Proto();
+ public static final ProtoBufModule INSTANCE = new ProtoBufModule();
@StarlarkMethod(
name = "encode_text",
diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/README.md b/larky/src/main/java/com/verygood/security/larky/modules/README.md
similarity index 60%
rename from larky/src/main/java/com/verygood/security/larky/nativelib/README.md
rename to larky/src/main/java/com/verygood/security/larky/modules/README.md
index 6ef22c57c..678626c8a 100644
--- a/larky/src/main/java/com/verygood/security/larky/nativelib/README.md
+++ b/larky/src/main/java/com/verygood/security/larky/modules/README.md
@@ -10,3 +10,25 @@ In order to ensure that Larky is compatible with Python (besides the obvious `lo
As a result, globals should not be accessed directly. Instead, access Larky native functions and methods via the [`Larky` stdlib namespace](https://github.com/verygoodsecurity/starlarky/blob/master/larky/src/main/resources/stdlib/larky.star). Again, Do not access these libraries directly, but access them through Larky StdLib via the [`larky` namespace](https://github.com/verygoodsecurity/starlarky/blob/master/larky/src/main/resources/stdlib/larky.star).
+### How does one emulate a while loop?
+```python
+ while pos <= finish:
+ # do stuff
+```
+
+emulate it by:
+
+```python
+ for _while_ in range(1000): # "while pos <= finish" is the same as:
+ if pos > finish: # for _while_ in range(xxx):
+ break # if pos > finish: break
+```
+
+Obviously, range can take a larger number to emulate infinity.
+
+### Native Module
+
+Source files for standard library _extension_ modules.
+
+These are *NOT* built-in modules, but are basically extension wrappers that help
+implement the standard library.
\ No newline at end of file
diff --git a/larky/src/main/java/com/verygood/security/larky/modules/RegexModule.java b/larky/src/main/java/com/verygood/security/larky/modules/RegexModule.java
new file mode 100644
index 000000000..85d232d75
--- /dev/null
+++ b/larky/src/main/java/com/verygood/security/larky/modules/RegexModule.java
@@ -0,0 +1,33 @@
+package com.verygood.security.larky.modules;
+
+import com.verygood.security.larky.modules.re.RegexPattern;
+
+import net.starlark.java.annot.StarlarkBuiltin;
+import net.starlark.java.annot.StarlarkMethod;
+import net.starlark.java.eval.StarlarkValue;
+
+
+@StarlarkBuiltin(
+ name = "re2j",
+ category = "BUILTIN",
+ doc = "This module provides access to the linear regular expression matching engine.\n" +
+ "\n" +
+ "This package provides an implementation of regular expression matching based on Russ Cox's linear-time RE2 algorithm.\n" +
+ "\n" +
+ "The API presented by com.google.re2j mimics that of java.util.regex.Matcher and java.util.regex.Pattern. While not identical, they are similar enough that most users can switch implementations simply by changing their imports.\n" +
+ "\n" +
+ "The syntax of the regular expressions accepted is the same general syntax used by Perl, Python, and other languages. More precisely, it is the syntax accepted by the C++ and Go implementations of RE2 described at https://github.com/google/re2/wiki/Syntax, except for \\C (match any byte), which is not supported because in this implementation, the matcher's input is conceptually a stream of Unicode code points, not bytes.\n" +
+ "\n" +
+ "The current API is rather small and intended for compatibility with java.util.regex, but the underlying implementation supports some additional features, such as the ability to process input character streams encoded as UTF-8 byte arrays. These may be exposed in a future release if there is sufficient interest." +
+ "\n" +
+ "More on syntax here: https://github.com/google/re2/wiki/Syntax")
+public class RegexModule implements StarlarkValue {
+
+ public static final RegexModule INSTANCE = new RegexModule();
+
+ private static final RegexPattern _Pattern = new RegexPattern();
+
+ @StarlarkMethod(name = "Pattern", doc = "pattern", structField = true)
+ public static RegexPattern Pattern() { return _Pattern; }
+
+}
diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/LarkyGlobals.java b/larky/src/main/java/com/verygood/security/larky/modules/globals/LarkyGlobals.java
similarity index 90%
rename from larky/src/main/java/com/verygood/security/larky/nativelib/LarkyGlobals.java
rename to larky/src/main/java/com/verygood/security/larky/modules/globals/LarkyGlobals.java
index 5af7e1af2..3b7c6b514 100644
--- a/larky/src/main/java/com/verygood/security/larky/nativelib/LarkyGlobals.java
+++ b/larky/src/main/java/com/verygood/security/larky/modules/globals/LarkyGlobals.java
@@ -1,9 +1,10 @@
-package com.verygood.security.larky.nativelib;
+package com.verygood.security.larky.modules.globals;
import com.verygood.security.larky.annot.Library;
import com.verygood.security.larky.annot.StarlarkConstructor;
-import com.verygood.security.larky.stdtypes.structs.Partial;
-import com.verygood.security.larky.stdtypes.structs.SimpleStruct;
+import com.verygood.security.larky.modules.types.Property;
+import com.verygood.security.larky.modules.types.Partial;
+import com.verygood.security.larky.modules.types.structs.SimpleStruct;
import net.starlark.java.annot.Param;
import net.starlark.java.annot.ParamType;
@@ -107,8 +108,8 @@ public Partial partial(StarlarkFunction function, Tuple args, Dict kwargs, StarlarkThread thread) {
- return LarkyProperty.builder()
+ public Property property(StarlarkCallable getter, Object setter, Tuple args, Dict kwargs, StarlarkThread thread) {
+ return Property.builder()
.thread(thread)
.fget(getter)
.fset(setter != Starlark.NONE ? (StarlarkCallable) setter : null)
diff --git a/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java b/larky/src/main/java/com/verygood/security/larky/modules/globals/PythonBuiltins.java
similarity index 67%
rename from larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java
rename to larky/src/main/java/com/verygood/security/larky/modules/globals/PythonBuiltins.java
index 4cf71c8dc..5c44120cc 100644
--- a/larky/src/main/java/com/verygood/security/larky/nativelib/PythonBuiltins.java
+++ b/larky/src/main/java/com/verygood/security/larky/modules/globals/PythonBuiltins.java
@@ -1,4 +1,4 @@
-package com.verygood.security.larky.nativelib;
+package com.verygood.security.larky.modules.globals;
import com.verygood.security.larky.annot.Library;
@@ -17,7 +17,7 @@
* A work-in-progress to add methods as we need them.
*
* More here: https://docs.python.org/3/library/functions.html
- *
+ *
* */
@Library
public final class PythonBuiltins {
@@ -63,4 +63,38 @@ public StarlarkInt pow(StarlarkInt base, StarlarkInt exp, Object mod) throws Eva
.modPow(exp.toBigInteger(), ((StarlarkInt) mod).toBigInteger())
);
}
+//
+// @StarlarkMethod(
+// name = "bytes",
+// doc = "immutable array of bytes",
+// parameters = {
+// @Param(
+// name = "sequence",
+// allowedTypes = {
+// @ParamType(type = String.class),
+// }
+// )
+// }
+// )
+// public StarlarkList bytes(String sequence) {
+// byte[] bytes = sequence.getBytes(StandardCharsets.UTF_8);
+//
+// return StarlarkList.immutableOf(Stream.of(bytes.).map((byte[] x) -> Byte.toUnsignedInt(x)).collect();
+// }
+//
+// @StarlarkMethod(
+// name = "chr",
+// doc = "Return ascii ord",
+// parameters = {
+// @Param(
+// name = "ordinal",
+// allowedTypes = {
+// @ParamType(type = StarlarkInt.class),
+// }
+// )
+// }
+// )
+// public String chr(StarlarkInt ordinal) {
+// return String.valueOf((char) ordinal.toIntUnchecked());
+// }
}
diff --git a/larky/src/main/java/com/verygood/security/larky/modules/re/RegexMatcher.java b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexMatcher.java
new file mode 100644
index 000000000..bedf56e14
--- /dev/null
+++ b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexMatcher.java
@@ -0,0 +1,317 @@
+package com.verygood.security.larky.modules.re;
+
+import com.google.common.base.Joiner;
+import com.google.re2j.Matcher;
+
+import com.verygood.security.larky.parser.StarlarkUtil;
+
+import net.starlark.java.annot.Param;
+import net.starlark.java.annot.ParamType;
+import net.starlark.java.annot.StarlarkMethod;
+import net.starlark.java.eval.EvalException;
+import net.starlark.java.eval.NoneType;
+import net.starlark.java.eval.Starlark;
+import net.starlark.java.eval.StarlarkInt;
+import net.starlark.java.eval.StarlarkList;
+import net.starlark.java.eval.StarlarkValue;
+
+import java.util.Arrays;
+
+public class RegexMatcher implements StarlarkValue {
+ private final Matcher matcher;
+ private final RegexPattern pattern;
+
+ RegexMatcher(Matcher matcher) {
+ this.matcher = matcher;
+ this.pattern = new RegexPattern().pattern(matcher.pattern());
+ }
+
+ RegexMatcher(Matcher matcher, RegexPattern pattern) {
+ this.matcher = matcher;
+ this.pattern = pattern;
+ }
+
+ @StarlarkMethod(
+ name = "pattern",
+ doc = "Returns the RegexPattern associated with this RegexMatcher.\n"
+ )
+ public RegexPattern pattern() {
+ return pattern;
+ }
+
+ @StarlarkMethod(
+ name = "reset",
+ doc = "Resets the RegexMatcher, rewinding input and discarding any match information.\n",
+ parameters = {
+ @Param(
+ name = "input",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ @ParamType(type = NoneType.class)
+ },
+ defaultValue = "None"
+ )
+ }
+ )
+ public RegexMatcher reset(Object input) {
+ if (NoneType.class.isAssignableFrom(input.getClass())) {
+ matcher.reset();
+ } else if (String.class.isAssignableFrom(input.getClass())) {
+ matcher.reset(String.valueOf(input));
+ }
+ return this;
+ }
+
+ @StarlarkMethod(
+ name = "start",
+ doc = "Returns the start position of the most recent match." +
+ "\n" +
+ "Accepts a group index position, or defaults to 0 if it's the overall match.",
+ parameters = {
+ @Param(
+ name = "index",
+ allowedTypes = {
+ @ParamType(type = StarlarkInt.class),
+ },
+ defaultValue = "0"
+ )
+ }
+ )
+ public StarlarkInt start(StarlarkInt index) {
+ return StarlarkInt.of(matcher.start(index.toIntUnchecked()));
+ }
+
+ @StarlarkMethod(
+ name = "end",
+ doc = "Returns the end position of the most recent match." +
+ "\n" +
+ "Accepts a group index position, or defaults to 0 if it's the overall match.",
+ parameters = {
+ @Param(
+ name = "index",
+ allowedTypes = {
+ @ParamType(type = StarlarkInt.class),
+ },
+ defaultValue = "0"
+ )
+ }
+ )
+ public StarlarkInt end(StarlarkInt index) {
+ return StarlarkInt.of(matcher.end(index.toIntUnchecked()));
+ }
+
+ @StarlarkMethod(
+ name = "group",
+ doc = "Returns the most recent match." +
+ "\n" +
+ "If no argument or None is passed in, returns the most recent match, or " +
+ "null if the group was not matched." +
+ "\n" +
+ "If a valid integer is returned, returns the subgroup of the most recent match." +
+ "\n" +
+ "Throws an exception if group < 0 or group > group_count()",
+ parameters = {
+ @Param(
+ name = "group",
+ allowedTypes = {
+ @ParamType(type = StarlarkInt.class),
+ @ParamType(type = String.class),
+ @ParamType(type = NoneType.class),
+ },
+ defaultValue = "None")
+ })
+ public Object group(Object group) {
+ String g;
+ if (Starlark.isNullOrNone(group)) {
+ g = matcher.group();
+ } else if (StarlarkInt.class.isAssignableFrom(group.getClass())) {
+ g = matcher.group(((StarlarkInt) group).toIntUnchecked());
+ }
+ // default case
+ else {
+ g = matcher.group(String.valueOf(group));
+ }
+
+ if (g == null) {
+ return Starlark.NONE;
+ }
+ return g;
+
+ }
+
+ @StarlarkMethod(
+ name = "group_count",
+ doc = "Returns the number of subgroups in this pattern.\n" +
+ "the number of subgroups; the overall match (group 0) does not count\n"
+ )
+ public StarlarkInt groupCount() {
+ return StarlarkInt.of(matcher.groupCount());
+ }
+
+ @StarlarkMethod(
+ name = "matches",
+ doc = "Matches the entire input against the pattern (anchored start and end). " +
+ "If there is a match, matches sets the match state to describe it.\n" +
+ "the number of subgroups; the overall match (group 0) does not count\n" +
+ "\n" +
+ "Returns: true if the entire input matches the pattern"
+ )
+ public boolean matches() {
+ return matcher.matches();
+ }
+
+ @StarlarkMethod(
+ name = "looking_at",
+ doc = "Matches the beginning of input against the pattern (anchored start). " +
+ "If there is a match, looking_at sets the match state to describe it." +
+ "\n" +
+ "Returns true if the beginning of the input matches the pattern\n"
+ )
+ public boolean lookingAt() {
+ return matcher.lookingAt();
+ }
+
+ @StarlarkMethod(
+ name = "find",
+ doc = "Matches the input against the pattern (unanchored), starting at a specified position." +
+ " If there is a match, find sets the match state to describe it." +
+ "\n" +
+ "start - the input position where the search begins\n" +
+ "\n" +
+ "Returns true if it finds a match or throw if start is not a valid input position\n",
+ parameters = {
+ @Param(
+ name = "start",
+ allowedTypes = {
+ @ParamType(type = StarlarkInt.class),
+ @ParamType(type = NoneType.class),
+ },
+ defaultValue = "None"
+ )
+ }
+ )
+ public boolean find(Object start) {
+ if (Starlark.isNullOrNone(start)) {
+ return matcher.find();
+ }
+ StarlarkInt s = (StarlarkInt) StarlarkUtil.valueToStarlark(start);
+ return matcher.find(s.toIntUnchecked());
+ }
+
+ @StarlarkMethod(
+ name = "quote_replacement",
+ doc = "Quotes '\\' and '$' in s, so that the returned string could be used in " +
+ "append_replacement(appendable_string, s) as a literal replacement of s.\n" +
+ "\n" +
+ "Returns: the quoted string",
+ parameters = {
+ @Param(
+ name = "s",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ }
+ )
+ }
+ )
+ public static String quoteReplacement(String s) {
+ return Matcher.quoteReplacement(s);
+ }
+
+ @StarlarkMethod(
+ name = "append_replacement",
+ doc = "Appends to sb two strings: the text from the append position up to the " +
+ "beginning of the most recent match, and then the replacement with submatch groups" +
+ " substituted for references of the form $n, where n is the group number in decimal" +
+ ". It advances the append position to where the most recent match ended." +
+ "\n" +
+ "To embed a literal $, use \\$ (actually \"\\\\$\" with string escapes). The " +
+ "escape is only necessary when $ is followed by a digit, but it is always allowed. " +
+ "Only $ and \\ need escaping, but any character can be escaped." +
+ "\n" +
+ "\n" +
+ "The group number n in $n is always at least one digit and expands to use more " +
+ "digits as long as the resulting number is a valid group number for this pattern. " +
+ "To cut it off earlier, escape the first digit that should not be used." +
+ "\n" +
+ "Returns: the Matcher itself, for chained method calls\n",
+ parameters = {
+ @Param(
+ name = "sb",
+ allowedTypes = {
+ @ParamType(type = StarlarkList.class),
+ }
+ ),
+ @Param(
+ name = "replacement",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ }
+ )}
+ )
+ public RegexMatcher appendReplacement(StarlarkList sb, String replacement) {
+ StringBuilder builder = new StringBuilder().append(Joiner.on("").join(sb));
+ matcher.appendReplacement(builder, replacement);
+ try {
+ sb.clearElements();
+ sb.addElements(Arrays.asList(builder.toString().split("")));
+ } catch (EvalException e) {
+ throw new RuntimeException(e);
+ }
+ return this;
+ }
+
+ @StarlarkMethod(
+ name = "append_tail",
+ doc = "Appends to sb the substring of the input from the append position to the " +
+ "end of the input." +
+ "\n" +
+ "Returns the argument sb, for method chaining\n",
+ parameters = {
+ @Param(
+ name = "s",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ }
+ )}
+ )
+ public String appendTail(String s) {
+ return matcher.appendTail(new StringBuilder().append(s)).toString();
+ }
+
+ @StarlarkMethod(
+ name = "replace_all",
+ doc = "Returns the input with all matches replaced by replacement, interpreted as for" +
+ " append_replacement." +
+ "\n" +
+ "The input string with the matches replaced\n",
+ parameters = {
+ @Param(
+ name = "replacement",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ }
+ )}
+ )
+ public String replaceAll(String replacement) {
+ return matcher.replaceAll(replacement);
+ }
+
+ @StarlarkMethod(
+ name = "replace_first",
+ doc = "Returns the input with the first match replaced by replacement, " +
+ "interpreted as for append_replacement.\n" +
+ "\n" +
+ "The input string with the first matches replaced\n",
+ parameters = {
+ @Param(
+ name = "replacement",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ }
+ )}
+ )
+ public String replaceFirst(String replacement) {
+ return matcher.replaceFirst(replacement);
+ }
+
+}
diff --git a/larky/src/main/java/com/verygood/security/larky/modules/re/RegexPattern.java b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexPattern.java
new file mode 100644
index 000000000..b70a93500
--- /dev/null
+++ b/larky/src/main/java/com/verygood/security/larky/modules/re/RegexPattern.java
@@ -0,0 +1,261 @@
+package com.verygood.security.larky.modules.re;
+
+import com.google.re2j.Matcher;
+import com.google.re2j.Pattern;
+
+import net.starlark.java.annot.Param;
+import net.starlark.java.annot.ParamType;
+import net.starlark.java.annot.StarlarkMethod;
+import net.starlark.java.eval.Starlark;
+import net.starlark.java.eval.StarlarkInt;
+import net.starlark.java.eval.StarlarkList;
+import net.starlark.java.eval.StarlarkValue;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+// java <> larky objects
+public class RegexPattern implements StarlarkValue {
+
+ @StarlarkMethod(name = "CASE_INSENSITIVE", doc = "Flag: case insensitive matching.", structField = true)
+ public StarlarkInt CASE_INSENSITIVE() {
+ return StarlarkInt.of(Pattern.CASE_INSENSITIVE);
+ }
+
+ @StarlarkMethod(name = "DISABLE_UNICODE_GROUPS", doc = "Flag: Unicode groups (e.g. \\p\\ Greek\\ ) will be syntax errors", structField = true)
+ public StarlarkInt DISABLE_UNICODE_GROUPS() {
+ return StarlarkInt.of(Pattern.DISABLE_UNICODE_GROUPS);
+ }
+
+ @StarlarkMethod(name = "DOTALL", doc = "Flag: dot (.) matches all characters, including newline.", structField = true)
+ public StarlarkInt DOTALL() {
+ return StarlarkInt.of(Pattern.DOTALL);
+ }
+
+ @StarlarkMethod(name = "LONGEST_MATCH", doc = "Flag: matches longest possible string.", structField = true)
+ public StarlarkInt LONGEST_MATCH() {
+ return StarlarkInt.of(Pattern.LONGEST_MATCH);
+ }
+
+ @StarlarkMethod(name = "MULTILINE", doc = "Flag: multiline matching: ^ and $ match at beginning and end of line, not just beginning and end of input.", structField = true)
+ public StarlarkInt MULTILINE() {
+ return StarlarkInt.of(Pattern.MULTILINE);
+ }
+
+ private Pattern pattern;
+
+ protected RegexPattern pattern(Pattern pattern) {
+ this.pattern = pattern;
+ return this;
+ }
+
+ @StarlarkMethod(
+ name = "compile",
+ doc = "Creates and returns a new Pattern corresponding to compiling regex with the given flags." +
+ "If flags is not passed, it defaults to 0",
+ parameters = {
+ @Param(name = "regex"),
+ @Param(
+ name = "flags",
+ allowedTypes = {
+ @ParamType(type = StarlarkInt.class),
+ },
+ defaultValue = "0")
+ })
+ public static RegexPattern compile(String regex, StarlarkInt flags) {
+ int flag = flags.toIntUnchecked();
+ return new RegexPattern().pattern(Pattern.compile(regex, flag));
+ }
+
+ @StarlarkMethod(
+ name = "matches",
+ doc = "Matches a string against a regular expression.",
+ parameters = {
+ @Param(name = "regex"),
+ @Param(
+ name = "input",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ })
+ })
+ public static boolean matches(String regex, String input) {
+ return Pattern.matches(regex, input);
+ }
+
+ @StarlarkMethod(
+ name = "quote",
+ doc = "",
+ parameters = {
+ @Param(
+ name = "s",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ })
+ })
+ public static String quote(String s) {
+ return Pattern.quote(s);
+ }
+
+ @StarlarkMethod(
+ name = "flags",
+ doc = ""
+ )
+ public StarlarkInt flags() {
+ return StarlarkInt.of(pattern.flags());
+ }
+
+ @StarlarkMethod(name = "pattern", doc = "")
+ public String pattern() {
+ return pattern.pattern();
+ }
+
+ @StarlarkMethod(
+ name = "matcher",
+ doc = "Creates a new Matcher matching the pattern against the input.\n",
+ parameters = {
+ @Param(
+ name = "input",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ })
+ })
+ public RegexMatcher matcher(String input) {
+ return new RegexMatcher(pattern.matcher(input), this);
+ }
+
+ @StarlarkMethod(
+ name = "split",
+ doc = "",
+ parameters = {
+ @Param(
+ name = "input",
+ allowedTypes = {
+ @ParamType(type = String.class),
+ }),
+ @Param(
+ name = "limit",
+ allowedTypes = {
+ @ParamType(type = StarlarkInt.class)
+ },
+ defaultValue = "0"
+ )
+ })
+ public StarlarkList