Skip to content

Commit

Permalink
Followup to opcode base64_decode (#3288)
Browse files Browse the repository at this point in the history
* alphabet begone in favor of encoding

* unit test various padding and whitespace scenarios

* padding permutations also fail

* "Slicing" --> "Manipulation"

* fix the codegen fail?

* Documenting padding, whitespace, other character behavior
  • Loading branch information
tzaffi authored Dec 16, 2021
1 parent f5bf5ec commit 9a5d5cd
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 79 deletions.
2 changes: 1 addition & 1 deletion cmd/opdoc/tmLanguage.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func buildSyntaxHighlight() *tmLanguage {
// For these, accumulate into allArithmetics,
// and only add to keyword.Patterns later, when all
// have been collected.
case "Arithmetic", "Byte Array Slicing", "Byte Array Arithmetic",
case "Arithmetic", "Byte Array Manipulation", "Byte Array Arithmetic",
"Byte Array Logic", "Inner Transactions":
escape := map[rune]bool{
'*': true,
Expand Down
2 changes: 1 addition & 1 deletion data/transactions/logic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ various sizes.
| `extract_uint16` | pop a byte-array A and integer B. Extract a range of bytes from A starting at B up to but not including B+2, convert bytes as big endian and push the uint64 result. If B+2 is larger than the array length, the program fails |
| `extract_uint32` | pop a byte-array A and integer B. Extract a range of bytes from A starting at B up to but not including B+4, convert bytes as big endian and push the uint64 result. If B+4 is larger than the array length, the program fails |
| `extract_uint64` | pop a byte-array A and integer B. Extract a range of bytes from A starting at B up to but not including B+8, convert bytes as big endian and push the uint64 result. If B+8 is larger than the array length, the program fails |
| `base64_decode e` | decode X which was base64-encoded using _encoding alphabet_ E. Fail if X is not base64 encoded with alphabet E |
| `base64_decode e` | decode X which was base64-encoded using _encoding_ E. Fail if X is not base64 encoded with encoding E |

These opcodes take byte-array values that are interpreted as
big-endian unsigned integers. For mathematical operators, the
Expand Down
2 changes: 1 addition & 1 deletion data/transactions/logic/README_in.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ various sizes.

### Byte Array Manipulation

@@ Byte_Array_Slicing.md @@
@@ Byte_Array_Manipulation.md @@

These opcodes take byte-array values that are interpreted as
big-endian unsigned integers. For mathematical operators, the
Expand Down
6 changes: 3 additions & 3 deletions data/transactions/logic/TEAL_opcodes.md
Original file line number Diff line number Diff line change
Expand Up @@ -856,14 +856,14 @@ When A is a uint64, index 0 is the least significant bit. Setting bit 3 to 1 on

## base64_decode e

- Opcode: 0x5c {uint8 alphabet index}
- Opcode: 0x5c {uint8 encoding index}
- Pops: *... stack*, []byte
- Pushes: []byte
- decode X which was base64-encoded using _encoding alphabet_ E. Fail if X is not base64 encoded with alphabet E
- decode X which was base64-encoded using _encoding_ E. Fail if X is not base64 encoded with encoding E
- **Cost**: 25
- LogicSigVersion >= 6

decodes X using the base64 encoding alphabet E. Specify the alphabet with an immediate arg either as URL and Filename Safe (`URLAlph`) or Standard (`StdAlph`). See <a href="https://rfc-editor.org/rfc/rfc4648.html#section-4">RFC 4648</a> (sections 4 and 5)
Decodes X using the base64 encoding E. Specify the encoding with an immediate arg either as URL and Filename Safe (`URLEncoding`) or Standard (`StdEncoding`). See <a href="https://rfc-editor.org/rfc/rfc4648.html#section-4">RFC 4648</a> (sections 4 and 5). It is assumed that the encoding ends with the exact number of `=` padding characters as required by the RFC. When padding occurs, any unused pad bits in the encoding must be set to zero or the decoding will fail. The special cases of `\n` and `\r` are allowed but completely ignored. An error will result when attempting to decode a string with a character that is not in the encoding alphabet or not one of `=`, `\r`, or `\n`.

## balance

Expand Down
18 changes: 9 additions & 9 deletions data/transactions/logic/assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -1249,20 +1249,20 @@ func assembleBase64Decode(ops *OpStream, spec *OpSpec, args []string) error {
return ops.errorf("%s expects one argument", spec.Name)
}

alph, ok := base64AlphabetSpecByName[args[0]]
encoding, ok := base64EncodingSpecByName[args[0]]
if !ok {
return ops.errorf("%s unknown alphabet: %#v", spec.Name, args[0])
return ops.errorf("%s unknown encoding: %#v", spec.Name, args[0])
}
if alph.version > ops.Version {
if encoding.version > ops.Version {
//nolint:errcheck // we continue to maintain typestack
ops.errorf("%s %s available in version %d. Missed #pragma version?", spec.Name, args[0], alph.version)
ops.errorf("%s %s available in version %d. Missed #pragma version?", spec.Name, args[0], encoding.version)
}

val := alph.field
val := encoding.field
ops.pending.WriteByte(spec.Opcode)
ops.pending.WriteByte(uint8(val))
ops.trace("%s (%s)", alph.field, alph.ftype)
ops.returns(alph.ftype)
ops.trace("%s (%s)", encoding.field, encoding.ftype)
ops.returns(encoding.ftype)
return nil
}

Expand Down Expand Up @@ -2698,10 +2698,10 @@ func disBase64Decode(dis *disassembleState, spec *OpSpec) (string, error) {
}
dis.nextpc = dis.pc + 2
b64dArg := dis.program[dis.pc+1]
if int(b64dArg) >= len(base64AlphabetNames) {
if int(b64dArg) >= len(base64EncodingNames) {
return "", fmt.Errorf("invalid base64_decode arg index %d at pc=%d", b64dArg, dis.pc)
}
return fmt.Sprintf("%s %s", spec.Name, base64AlphabetNames[b64dArg]), nil
return fmt.Sprintf("%s %s", spec.Name, base64EncodingNames[b64dArg]), nil
}

type disInfo struct {
Expand Down
2 changes: 1 addition & 1 deletion data/transactions/logic/assembler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ itxna Logs 3

const v6Nonsense = v5Nonsense + `
itxn_next
base64_decode URLAlph
base64_decode URLEncoding
`

var nonsense = map[uint64]string{
Expand Down
22 changes: 11 additions & 11 deletions data/transactions/logic/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ var opDocByName = map[string]string{
"extract_uint16": "pop a byte-array A and integer B. Extract a range of bytes from A starting at B up to but not including B+2, convert bytes as big endian and push the uint64 result. If B+2 is larger than the array length, the program fails",
"extract_uint32": "pop a byte-array A and integer B. Extract a range of bytes from A starting at B up to but not including B+4, convert bytes as big endian and push the uint64 result. If B+4 is larger than the array length, the program fails",
"extract_uint64": "pop a byte-array A and integer B. Extract a range of bytes from A starting at B up to but not including B+8, convert bytes as big endian and push the uint64 result. If B+8 is larger than the array length, the program fails",
"base64_decode": "decode X which was base64-encoded using _encoding alphabet_ E. Fail if X is not base64 encoded with alphabet E",
"base64_decode": "decode X which was base64-encoded using _encoding_ E. Fail if X is not base64 encoded with encoding E",

"balance": "get balance for account A, in microalgos. The balance is observed after the effects of previous transactions in the group, and after the fee for the current transaction is deducted.",
"min_balance": "get minimum required balance for account A, in microalgos. Required balance is affected by [ASA](https://developer.algorand.org/docs/features/asa/#assets-overview) and [App](https://developer.algorand.org/docs/features/asc1/stateful/#minimum-balance-requirement-for-a-smart-contract) usage. When creating or opting into an app, the minimum balance grows before the app code runs, therefore the increase is visible there. When deleting or closing out, the minimum balance decreases after the app executes.",
Expand Down Expand Up @@ -231,7 +231,7 @@ var opcodeImmediateNotes = map[string]string{
"ecdsa_pk_decompress": "{uint8 curve index}",
"ecdsa_pk_recover": "{uint8 curve index}",

"base64_decode": "{uint8 alphabet index}",
"base64_decode": "{uint8 encoding index}",
}

// OpImmediateNote returns a short string about immediate data which follows the op byte
Expand Down Expand Up @@ -286,7 +286,7 @@ var opDocExtras = map[string]string{
"itxn_begin": "`itxn_begin` initializes Sender to the application address; Fee to the minimum allowable, taking into account MinTxnFee and credit from overpaying in earlier transactions; FirstValid/LastValid to the values in the top-level transaction, and all other fields to zero values.",
"itxn_field": "`itxn_field` fails if X is of the wrong type for F, including a byte array of the wrong size for use as an address when F is an address field. `itxn_field` also fails if X is an account or asset that does not appear in `txn.Accounts` or `txn.ForeignAssets` of the top-level transaction. (Setting addresses in asset creation are exempted from this requirement.)",
"itxn_submit": "`itxn_submit` resets the current transaction so that it can not be resubmitted. A new `itxn_begin` is required to prepare another inner transaction.",
"base64_decode": "decodes X using the base64 encoding alphabet E. Specify the alphabet with an immediate arg either as URL and Filename Safe (`URLAlph`) or Standard (`StdAlph`). See <a href=\"https://rfc-editor.org/rfc/rfc4648.html#section-4\">RFC 4648</a> (sections 4 and 5)",
"base64_decode": "Decodes X using the base64 encoding E. Specify the encoding with an immediate arg either as URL and Filename Safe (`URLEncoding`) or Standard (`StdEncoding`). See <a href=\"https://rfc-editor.org/rfc/rfc4648.html#section-4\">RFC 4648</a> (sections 4 and 5). It is assumed that the encoding ends with the exact number of `=` padding characters as required by the RFC. When padding occurs, any unused pad bits in the encoding must be set to zero or the decoding will fail. The special cases of `\\n` and `\\r` are allowed but completely ignored. An error will result when attempting to decode a string with a character that is not in the encoding alphabet or not one of `=`, `\\r`, or `\\n`.",
}

// OpDocExtra returns extra documentation text about an op
Expand All @@ -298,14 +298,14 @@ func OpDocExtra(opName string) string {
// here is the order args opcodes are presented, so place related
// opcodes consecutively, even if their opcode values are not.
var OpGroups = map[string][]string{
"Arithmetic": {"sha256", "keccak256", "sha512_256", "ed25519verify", "ecdsa_verify", "ecdsa_pk_recover", "ecdsa_pk_decompress", "+", "-", "/", "*", "<", ">", "<=", ">=", "&&", "||", "shl", "shr", "sqrt", "bitlen", "exp", "==", "!=", "!", "len", "itob", "btoi", "%", "|", "&", "^", "~", "mulw", "addw", "divmodw", "expw", "getbit", "setbit", "getbyte", "setbyte", "concat"},
"Byte Array Slicing": {"substring", "substring3", "extract", "extract3", "extract_uint16", "extract_uint32", "extract_uint64", "base64_decode"},
"Byte Array Arithmetic": {"b+", "b-", "b/", "b*", "b<", "b>", "b<=", "b>=", "b==", "b!=", "b%"},
"Byte Array Logic": {"b|", "b&", "b^", "b~"},
"Loading Values": {"intcblock", "intc", "intc_0", "intc_1", "intc_2", "intc_3", "pushint", "bytecblock", "bytec", "bytec_0", "bytec_1", "bytec_2", "bytec_3", "pushbytes", "bzero", "arg", "arg_0", "arg_1", "arg_2", "arg_3", "args", "txn", "gtxn", "txna", "txnas", "gtxna", "gtxnas", "gtxns", "gtxnsa", "gtxnsas", "global", "load", "loads", "store", "stores", "gload", "gloads", "gaid", "gaids"},
"Flow Control": {"err", "bnz", "bz", "b", "return", "pop", "dup", "dup2", "dig", "cover", "uncover", "swap", "select", "assert", "callsub", "retsub"},
"State Access": {"balance", "min_balance", "app_opted_in", "app_local_get", "app_local_get_ex", "app_global_get", "app_global_get_ex", "app_local_put", "app_global_put", "app_local_del", "app_global_del", "asset_holding_get", "asset_params_get", "app_params_get", "log"},
"Inner Transactions": {"itxn_begin", "itxn_next", "itxn_field", "itxn_submit", "itxn", "itxna"},
"Arithmetic": {"sha256", "keccak256", "sha512_256", "ed25519verify", "ecdsa_verify", "ecdsa_pk_recover", "ecdsa_pk_decompress", "+", "-", "/", "*", "<", ">", "<=", ">=", "&&", "||", "shl", "shr", "sqrt", "bitlen", "exp", "==", "!=", "!", "len", "itob", "btoi", "%", "|", "&", "^", "~", "mulw", "addw", "divmodw", "expw", "getbit", "setbit", "getbyte", "setbyte", "concat"},
"Byte Array Manipulation": {"substring", "substring3", "extract", "extract3", "extract_uint16", "extract_uint32", "extract_uint64", "base64_decode"},
"Byte Array Arithmetic": {"b+", "b-", "b/", "b*", "b<", "b>", "b<=", "b>=", "b==", "b!=", "b%"},
"Byte Array Logic": {"b|", "b&", "b^", "b~"},
"Loading Values": {"intcblock", "intc", "intc_0", "intc_1", "intc_2", "intc_3", "pushint", "bytecblock", "bytec", "bytec_0", "bytec_1", "bytec_2", "bytec_3", "pushbytes", "bzero", "arg", "arg_0", "arg_1", "arg_2", "arg_3", "args", "txn", "gtxn", "txna", "txnas", "gtxna", "gtxnas", "gtxns", "gtxnsa", "gtxnsas", "global", "load", "loads", "store", "stores", "gload", "gloads", "gaid", "gaids"},
"Flow Control": {"err", "bnz", "bz", "b", "return", "pop", "dup", "dup2", "dig", "cover", "uncover", "swap", "select", "assert", "callsub", "retsub"},
"State Access": {"balance", "min_balance", "app_opted_in", "app_local_get", "app_local_get_ex", "app_global_get", "app_global_get_ex", "app_local_put", "app_global_put", "app_local_del", "app_global_del", "asset_holding_get", "asset_params_get", "app_params_get", "log"},
"Inner Transactions": {"itxn_begin", "itxn_next", "itxn_field", "itxn_submit", "itxn", "itxna"},
}

// OpCost indicates the cost of an operation over the range of
Expand Down
8 changes: 4 additions & 4 deletions data/transactions/logic/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -4047,15 +4047,15 @@ func base64Decode(encoded []byte, encoding *base64.Encoding) ([]byte, error) {

func opBase64Decode(cx *EvalContext) {
last := len(cx.stack) - 1
alphabetField := Base64Alphabet(cx.program[cx.pc+1])
fs, ok := base64AlphabetSpecByField[alphabetField]
encodingField := Base64Encoding(cx.program[cx.pc+1])
fs, ok := base64EncodingSpecByField[encodingField]
if !ok || fs.version > cx.version {
cx.err = fmt.Errorf("invalid base64_decode alphabet %d", alphabetField)
cx.err = fmt.Errorf("invalid base64_decode encoding %d", encodingField)
return
}

encoding := base64.URLEncoding
if alphabetField == StdAlph {
if encodingField == StdEncoding {
encoding = base64.StdEncoding
}
cx.stack[last].Bytes, cx.err = base64Decode(cx.stack[last].Bytes, encoding)
Expand Down
2 changes: 1 addition & 1 deletion data/transactions/logic/evalStateful_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2392,7 +2392,7 @@ func TestReturnTypes(t *testing.T) {
"itxn": "itxn_begin; int pay; itxn_field TypeEnum; itxn_submit; itxn CreatedAssetID",
// This next one is a cop out. Can't use itxna Logs until we have inner appl
"itxna": "itxn_begin; int pay; itxn_field TypeEnum; itxn_submit; itxn NumLogs",
"base64_decode": `pushbytes "YWJjMTIzIT8kKiYoKSctPUB+"; base64_decode StdAlph; pushbytes "abc123!?$*&()'-=@~"; ==; pushbytes "YWJjMTIzIT8kKiYoKSctPUB-"; base64_decode URLAlph; pushbytes "abc123!?$*&()'-=@~"; ==; &&; assert`,
"base64_decode": `pushbytes "YWJjMTIzIT8kKiYoKSctPUB+"; base64_decode StdEncoding; pushbytes "abc123!?$*&()'-=@~"; ==; pushbytes "YWJjMTIzIT8kKiYoKSctPUB-"; base64_decode URLEncoding; pushbytes "abc123!?$*&()'-=@~"; ==; &&; assert`,
}

// these require special input data and tested separately
Expand Down
78 changes: 63 additions & 15 deletions data/transactions/logic/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3738,15 +3738,15 @@ func BenchmarkBase64Decode(b *testing.B) {
"keccak256",
"sha256",
"sha512_256",
"base64_decode StdAlph",
"base64_decode URLAlph",
"base64_decode StdEncoding",
"base64_decode URLEncoding",
}
benches := [][]string{}
for i, tag := range tags {
for _, op := range ops {
testName := op
encoded := stds[i]
if op == "base64_decode URLAlph" {
if op == "base64_decode URLEncoding" {
encoded = urls[i]
}
if len(op) > 0 {
Expand Down Expand Up @@ -4961,14 +4961,16 @@ func TestPcDetails(t *testing.T) {
var minB64DecodeVersion uint64 = 6

type b64DecodeTestCase struct {
Encoded string
IsURL bool
Decoded string
Error error
Encoded string
IsURL bool
HasExtraNLs bool
Decoded string
Error error
}

var testCases = []b64DecodeTestCase{
{"TU9CWS1ESUNLOwoKb3IsIFRIRSBXSEFMRS4KCgpCeSBIZXJtYW4gTWVsdmlsbGU=",
false,
false,
`MOBY-DICK;
Expand All @@ -4980,6 +4982,7 @@ By Herman Melville`,
},
{"TU9CWS1ESUNLOwoKb3IsIFRIRSBXSEFMRS4KCgpCeSBIZXJtYW4gTWVsdmlsbGU=",
true,
false,
`MOBY-DICK;
or, THE WHALE.
Expand All @@ -4988,10 +4991,51 @@ or, THE WHALE.
By Herman Melville`,
nil,
},
{"YWJjMTIzIT8kKiYoKSctPUB+", false, "abc123!?$*&()'-=@~", nil},
{"YWJjMTIzIT8kKiYoKSctPUB-", true, "abc123!?$*&()'-=@~", nil},
{"YWJjMTIzIT8kKiYoKSctPUB+", true, "", base64.CorruptInputError(23)},
{"YWJjMTIzIT8kKiYoKSctPUB-", false, "", base64.CorruptInputError(23)},
{"YWJjMTIzIT8kKiYoKSctPUB+", false, false, "abc123!?$*&()'-=@~", nil},
{"YWJjMTIzIT8kKiYoKSctPUB-", true, false, "abc123!?$*&()'-=@~", nil},
{"YWJjMTIzIT8kKiYoKSctPUB+", true, false, "", base64.CorruptInputError(23)},
{"YWJjMTIzIT8kKiYoKSctPUB-", false, false, "", base64.CorruptInputError(23)},

// try extra ='s and various whitespace:
{"", false, false, "", nil},
{"", true, false, "", nil},
{"=", false, true, "", base64.CorruptInputError(0)},
{"=", true, true, "", base64.CorruptInputError(0)},
{" ", false, true, "", base64.CorruptInputError(0)},
{" ", true, true, "", base64.CorruptInputError(0)},
{"\t", false, true, "", base64.CorruptInputError(0)},
{"\t", true, true, "", base64.CorruptInputError(0)},
{"\r", false, true, "", nil},
{"\r", true, true, "", nil},
{"\n", false, true, "", nil},
{"\n", true, true, "", nil},

{"YWJjMTIzIT8kKiYoKSctPUB+\n", false, true, "abc123!?$*&()'-=@~", nil},
{"YWJjMTIzIT8kKiYoKSctPUB-\n", true, true, "abc123!?$*&()'-=@~", nil},
{"YWJjMTIzIT8kK\riYoKSctPUB+\n", false, true, "abc123!?$*&()'-=@~", nil},
{"YWJjMTIzIT8kK\riYoKSctPUB-\n", true, true, "abc123!?$*&()'-=@~", nil},
{"\n\rYWJjMTIzIT8\rkKiYoKSctPUB+\n", false, true, "abc123!?$*&()'-=@~", nil},
{"\n\rYWJjMTIzIT8\rkKiYoKSctPUB-\n", true, true, "abc123!?$*&()'-=@~", nil},

// padding and extra legal whitespace
{"SQ==", false, false, "I", nil},
{"SQ==", true, false, "I", nil},
{"\rS\r\nQ=\n=\r\r\n", false, true, "I", nil},
{"\rS\r\nQ=\n=\r\r\n", true, true, "I", nil},

// Padding necessary? - Yes it is! And exactly the expected place and amount.
{"SQ==", false, false, "I", nil},
{"SQ==", true, false, "I", nil},
{"S=Q=", false, false, "", base64.CorruptInputError(1)},
{"S=Q=", true, false, "", base64.CorruptInputError(1)},
{"=SQ=", false, false, "", base64.CorruptInputError(0)},
{"=SQ=", true, false, "", base64.CorruptInputError(0)},
{"SQ", false, false, "", base64.CorruptInputError(0)},
{"SQ", true, false, "", base64.CorruptInputError(0)},
{"SQ=", false, false, "", base64.CorruptInputError(3)},
{"SQ=", true, false, "", base64.CorruptInputError(3)},
{"SQ===", false, false, "", base64.CorruptInputError(4)},
{"SQ===", true, false, "", base64.CorruptInputError(4)},
}

func TestBase64DecodeFunc(t *testing.T) {
Expand All @@ -5003,10 +5047,14 @@ func TestBase64DecodeFunc(t *testing.T) {
if testCase.IsURL {
encoding = base64.URLEncoding
}
encoding = encoding.Strict()
// sanity check:
if testCase.Error == nil && !testCase.HasExtraNLs {
require.Equal(t, testCase.Encoded, encoding.EncodeToString([]byte(testCase.Decoded)))
}

decoded, err := base64Decode([]byte(testCase.Encoded), encoding)
require.Equal(t, testCase.Error, err, fmt.Sprintf("Error (%s): case decode [%s] -> [%s]", err, testCase.Encoded, testCase.Decoded))
require.Equal(t, []byte(testCase.Decoded), decoded)
require.Equal(t, testCase.Error, err)
}
}

Expand All @@ -5026,9 +5074,9 @@ base64_decode %s
args := []b64DecodeTestArgs{}
for _, testCase := range testCases {
if testCase.Error == nil {
field := "StdAlph"
field := "StdEncoding"
if testCase.IsURL {
field = "URLAlph"
field = "URLEncoding"
}
source := fmt.Sprintf(sourceTmpl, minB64DecodeVersion, field)
ops, err := AssembleStringWithVersion(source, minB64DecodeVersion)
Expand Down
Loading

0 comments on commit 9a5d5cd

Please sign in to comment.