diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 4aac843c..95c39fad 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,9 +15,8 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.8 - - name: Install PyYAML + - name: Install Coverage run: | - pip3 install -r requirements.txt pip3 install coverage - name: Run pre-commit run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a30716ac..0eb0c898 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,9 +15,8 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.8 - - name: Install PyYAML + - name: Install Coverage run: | - pip3 install -r requirements.txt pip3 install coverage - name: Test error outputs run: coverage run -m unittest -b diff --git a/.gitignore b/.gitignore index 4ddba6b5..13806158 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,6 @@ priv-instr-table.tex inst.rs inst.spinalhdl inst.sverilog -instr_dict.yaml +instr_dict.json __pycache__/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cca4b3b9..bd41c169 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,13 +26,8 @@ repos: # rev: v3.3.1 # hooks: # - id: pylint - # additional_dependencies: - # - "pyyaml==6.0.2" - # TODO: Enable this when types are added. - # - repo: https://github.com/RobertCraigie/pyright-python - # rev: v1.1.383 - # hooks: - # - id: pyright - # additional_dependencies: - # - "pyyaml==6.0.2" + - repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.383 + hooks: + - id: pyright diff --git a/README.md b/README.md index e6702ed6..e7a988cc 100644 --- a/README.md +++ b/README.md @@ -126,9 +126,11 @@ of extensions are being processed such that the *base-instruction* is not includ The following artifacts can be generated using parse.py: -- instr\_dict.yaml : This is file generated always by parse.py and contains the - entire main dictionary `instr\_dict` in YAML format. Note, in this yaml the - *dots* in an instruction are replaced with *underscores* +- instr\_dict.json : This is always generated by parse.py and contains the + entire main dictionary `instr\_dict` in JSON format. Note, in this file the + *dots* in an instruction are replaced with *underscores*. In previous + versions of this project the generated file was instr\_dict.yaml. Note that + JSON is a subset of YAML so the file can still be read by any YAML parser. - encoding.out.h : this is the header file that is used by tools like spike, pk, etc - instr-table.tex : the latex table of instructions used in the riscv-unpriv spec - priv-instr-table.tex : the latex table of instruction used in the riscv-priv spec @@ -138,14 +140,6 @@ The following artifacts can be generated using parse.py: - inst.spinalhdl : spinalhdl code to decode instructions - inst.go : go code to decode instructions -Make sure you install the required python pre-requisites are installed by executing the following -command: - -``` -sudo apt-get install python-pip3 -pip3 install -r requirements.txt -``` - To generate all the above artifacts for all instructions currently checked in, simply run `make` from the root-directory. This should print the following log on the command-line: ``` @@ -220,6 +214,6 @@ DEBUG:: Processing line: bne bimm12hi rs1 rs2 bimm12lo 14..12=1 6..2=0x ## How do I find where an instruction is defined? You can use `grep "^\s*" rv* unratified/rv*` OR run `make` and open -`instr_dict.yaml` and search of the instruction you are looking for. Within that -instruction the `extension` field will indicate which file the instruction was -picked from. +`instr_dict.json` and search for the instruction you are looking for. Within +that instruction the `extension` field will indicate which file the +instruction was picked from. diff --git a/c_utils.py b/c_utils.py index 40fa4cb4..0ebc6c2d 100644 --- a/c_utils.py +++ b/c_utils.py @@ -8,7 +8,7 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") -def make_c(instr_dict): +def make_c(instr_dict: InstrDict): mask_match_str = "" declare_insn_str = "" for i in instr_dict: diff --git a/chisel_utils.py b/chisel_utils.py index 0943584d..9916b76c 100644 --- a/chisel_utils.py +++ b/chisel_utils.py @@ -10,7 +10,7 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") -def make_chisel(instr_dict, spinal_hdl=False): +def make_chisel(instr_dict: InstrDict, spinal_hdl: bool = False): chisel_names = "" cause_names_str = "" @@ -31,7 +31,7 @@ def make_chisel(instr_dict, spinal_hdl=False): elif "rv_" in e: e_format = e.replace("rv_", "").upper() else: - e_format = e.upper + e_format = e.upper() chisel_names += f' val {e_format+"Type"} = Map(\n' for instr in e_instrs: tmp_instr_name = '"' + instr.upper().replace(".", "_") + '"' diff --git a/constants.py b/constants.py index cff9c043..4b608e35 100644 --- a/constants.py +++ b/constants.py @@ -1,6 +1,7 @@ import csv import re +# TODO: The constants in this file should be in all caps. overlapping_extensions = { "rv_zcmt": {"rv_c_d"}, "rv_zcmp": {"rv_c_d"}, @@ -21,29 +22,29 @@ # regex to find ..= patterns in instruction fixed_ranges = re.compile( - "\s*(?P\d+.?)\.\.(?P\d+.?)\s*=\s*(?P\d[\w]*)[\s$]*", re.M + r"\s*(?P\d+.?)\.\.(?P\d+.?)\s*=\s*(?P\d[\w]*)[\s$]*", re.M ) # regex to find = patterns in instructions # single_fixed = re.compile('\s+(?P\d+)=(?P[\w\d]*)[\s$]*', re.M) -single_fixed = re.compile("(?:^|[\s])(?P\d+)=(?P[\w]*)((?=\s|$))", re.M) +single_fixed = re.compile(r"(?:^|[\s])(?P\d+)=(?P[\w]*)((?=\s|$))", re.M) # regex to find the overloading condition variable -var_regex = re.compile("(?P[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M) +var_regex = re.compile(r"(?P[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M) # regex for pseudo op instructions returns the dependent filename, dependent # instruction, the pseudo op name and the encoding string pseudo_regex = re.compile( - "^\$pseudo_op\s+(?Prv[\d]*_[\w].*)::\s*(?P.*?)\s+(?P.*?)\s+(?P.*)$", + r"^\$pseudo_op\s+(?Prv[\d]*_[\w].*)::\s*(?P.*?)\s+(?P.*?)\s+(?P.*)$", re.M, ) imported_regex = re.compile( - "^\s*\$import\s*(?P.*)\s*::\s*(?P.*)", re.M + r"^\s*\$import\s*(?P.*)\s*::\s*(?P.*)", re.M ) -def read_csv(filename): +def read_csv(filename: str): """ Reads a CSV file and returns a list of tuples. Each tuple contains an integer value (from the first column) and a string (from the second column). @@ -79,126 +80,99 @@ def read_csv(filename): # dictionary containing the mapping of the argument to the what the fields in # the latex table should be -latex_mapping = {} -latex_mapping["imm12"] = "imm[11:0]" -latex_mapping["rs1"] = "rs1" -latex_mapping["rs2"] = "rs2" -latex_mapping["rd"] = "rd" -latex_mapping["imm20"] = "imm[31:12]" -latex_mapping["bimm12hi"] = "imm[12$\\vert$10:5]" -latex_mapping["bimm12lo"] = "imm[4:1$\\vert$11]" -latex_mapping["imm12hi"] = "imm[11:5]" -latex_mapping["imm12lo"] = "imm[4:0]" -latex_mapping["jimm20"] = "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]" -latex_mapping["zimm"] = "uimm" -latex_mapping["shamtw"] = "shamt" -latex_mapping["shamtd"] = "shamt" -latex_mapping["shamtq"] = "shamt" -latex_mapping["rd_p"] = "rd\\,$'$" -latex_mapping["rs1_p"] = "rs1\\,$'$" -latex_mapping["rs2_p"] = "rs2\\,$'$" -latex_mapping["rd_rs1_n0"] = "rd/rs$\\neq$0" -latex_mapping["rd_rs1_p"] = "rs1\\,$'$/rs2\\,$'$" -latex_mapping["c_rs2"] = "rs2" -latex_mapping["c_rs2_n0"] = "rs2$\\neq$0" -latex_mapping["rd_n0"] = "rd$\\neq$0" -latex_mapping["rs1_n0"] = "rs1$\\neq$0" -latex_mapping["c_rs1_n0"] = "rs1$\\neq$0" -latex_mapping["rd_rs1"] = "rd/rs1" -latex_mapping["zimm6hi"] = "uimm[5]" -latex_mapping["zimm6lo"] = "uimm[4:0]" -latex_mapping["c_nzuimm10"] = "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]" -latex_mapping["c_uimm7lo"] = "uimm[2$\\vert$6]" -latex_mapping["c_uimm7hi"] = "uimm[5:3]" -latex_mapping["c_uimm8lo"] = "uimm[7:6]" -latex_mapping["c_uimm8hi"] = "uimm[5:3]" -latex_mapping["c_uimm9lo"] = "uimm[7:6]" -latex_mapping["c_uimm9hi"] = "uimm[5:4$\\vert$8]" -latex_mapping["c_nzimm6lo"] = "nzimm[4:0]" -latex_mapping["c_nzimm6hi"] = "nzimm[5]" -latex_mapping["c_imm6lo"] = "imm[4:0]" -latex_mapping["c_imm6hi"] = "imm[5]" -latex_mapping["c_nzimm10hi"] = "nzimm[9]" -latex_mapping["c_nzimm10lo"] = "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]" -latex_mapping["c_nzimm18hi"] = "nzimm[17]" -latex_mapping["c_nzimm18lo"] = "nzimm[16:12]" -latex_mapping["c_imm12"] = ( - "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]" -) -latex_mapping["c_bimm9lo"] = "imm[7:6$\\vert$2:1$\\vert$5]" -latex_mapping["c_bimm9hi"] = "imm[8$\\vert$4:3]" -latex_mapping["c_nzuimm5"] = "nzuimm[4:0]" -latex_mapping["c_nzuimm6lo"] = "nzuimm[4:0]" -latex_mapping["c_nzuimm6hi"] = "nzuimm[5]" -latex_mapping["c_uimm8splo"] = "uimm[4:2$\\vert$7:6]" -latex_mapping["c_uimm8sphi"] = "uimm[5]" -latex_mapping["c_uimm8sp_s"] = "uimm[5:2$\\vert$7:6]" -latex_mapping["c_uimm10splo"] = "uimm[4$\\vert$9:6]" -latex_mapping["c_uimm10sphi"] = "uimm[5]" -latex_mapping["c_uimm9splo"] = "uimm[4:3$\\vert$8:6]" -latex_mapping["c_uimm9sphi"] = "uimm[5]" -latex_mapping["c_uimm10sp_s"] = "uimm[5:4$\\vert$9:6]" -latex_mapping["c_uimm9sp_s"] = "uimm[5:3$\\vert$8:6]" +latex_mapping = { + "imm12": "imm[11:0]", + "rs1": "rs1", + "rs2": "rs2", + "rd": "rd", + "imm20": "imm[31:12]", + "bimm12hi": "imm[12$\\vert$10:5]", + "bimm12lo": "imm[4:1$\\vert$11]", + "imm12hi": "imm[11:5]", + "imm12lo": "imm[4:0]", + "jimm20": "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]", + "zimm": "uimm", + "shamtw": "shamt", + "shamtd": "shamt", + "shamtq": "shamt", + "rd_p": "rd\\,$'$", + "rs1_p": "rs1\\,$'$", + "rs2_p": "rs2\\,$'$", + "rd_rs1_n0": "rd/rs$\\neq$0", + "rd_rs1_p": "rs1\\,$'$/rs2\\,$'$", + "c_rs2": "rs2", + "c_rs2_n0": "rs2$\\neq$0", + "rd_n0": "rd$\\neq$0", + "rs1_n0": "rs1$\\neq$0", + "c_rs1_n0": "rs1$\\neq$0", + "rd_rs1": "rd/rs1", + "zimm6hi": "uimm[5]", + "zimm6lo": "uimm[4:0]", + "c_nzuimm10": "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]", + "c_uimm7lo": "uimm[2$\\vert$6]", + "c_uimm7hi": "uimm[5:3]", + "c_uimm8lo": "uimm[7:6]", + "c_uimm8hi": "uimm[5:3]", + "c_uimm9lo": "uimm[7:6]", + "c_uimm9hi": "uimm[5:4$\\vert$8]", + "c_nzimm6lo": "nzimm[4:0]", + "c_nzimm6hi": "nzimm[5]", + "c_imm6lo": "imm[4:0]", + "c_imm6hi": "imm[5]", + "c_nzimm10hi": "nzimm[9]", + "c_nzimm10lo": "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]", + "c_nzimm18hi": "nzimm[17]", + "c_nzimm18lo": "nzimm[16:12]", + "c_imm12": "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]", + "c_bimm9lo": "imm[7:6$\\vert$2:1$\\vert$5]", + "c_bimm9hi": "imm[8$\\vert$4:3]", + "c_nzuimm5": "nzuimm[4:0]", + "c_nzuimm6lo": "nzuimm[4:0]", + "c_nzuimm6hi": "nzuimm[5]", + "c_uimm8splo": "uimm[4:2$\\vert$7:6]", + "c_uimm8sphi": "uimm[5]", + "c_uimm8sp_s": "uimm[5:2$\\vert$7:6]", + "c_uimm10splo": "uimm[4$\\vert$9:6]", + "c_uimm10sphi": "uimm[5]", + "c_uimm9splo": "uimm[4:3$\\vert$8:6]", + "c_uimm9sphi": "uimm[5]", + "c_uimm10sp_s": "uimm[5:4$\\vert$9:6]", + "c_uimm9sp_s": "uimm[5:3$\\vert$8:6]", +} # created a dummy instruction-dictionary like dictionary for all the instruction # types so that the same logic can be used to create their tables -latex_inst_type = {} -latex_inst_type["R-type"] = {} -latex_inst_type["R-type"]["variable_fields"] = [ - "opcode", - "rd", - "funct3", - "rs1", - "rs2", - "funct7", -] -latex_inst_type["R4-type"] = {} -latex_inst_type["R4-type"]["variable_fields"] = [ - "opcode", - "rd", - "funct3", - "rs1", - "rs2", - "funct2", - "rs3", -] -latex_inst_type["I-type"] = {} -latex_inst_type["I-type"]["variable_fields"] = [ - "opcode", - "rd", - "funct3", - "rs1", - "imm12", -] -latex_inst_type["S-type"] = {} -latex_inst_type["S-type"]["variable_fields"] = [ - "opcode", - "imm12lo", - "funct3", - "rs1", - "rs2", - "imm12hi", -] -latex_inst_type["B-type"] = {} -latex_inst_type["B-type"]["variable_fields"] = [ - "opcode", - "bimm12lo", - "funct3", - "rs1", - "rs2", - "bimm12hi", +latex_inst_type = { + "R-type": { + "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct7"], + }, + "R4-type": { + "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct2", "rs3"], + }, + "I-type": { + "variable_fields": ["opcode", "rd", "funct3", "rs1", "imm12"], + }, + "S-type": { + "variable_fields": ["opcode", "imm12lo", "funct3", "rs1", "rs2", "imm12hi"], + }, + "B-type": { + "variable_fields": ["opcode", "bimm12lo", "funct3", "rs1", "rs2", "bimm12hi"], + }, + "U-type": { + "variable_fields": ["opcode", "rd", "imm20"], + }, + "J-type": { + "variable_fields": ["opcode", "rd", "jimm20"], + }, +} +latex_fixed_fields = [ + (31, 25), + (24, 20), + (19, 15), + (14, 12), + (11, 7), + (6, 0), ] -latex_inst_type["U-type"] = {} -latex_inst_type["U-type"]["variable_fields"] = ["opcode", "rd", "imm20"] -latex_inst_type["J-type"] = {} -latex_inst_type["J-type"]["variable_fields"] = ["opcode", "rd", "jimm20"] -latex_fixed_fields = [] -latex_fixed_fields.append((31, 25)) -latex_fixed_fields.append((24, 20)) -latex_fixed_fields.append((19, 15)) -latex_fixed_fields.append((14, 12)) -latex_fixed_fields.append((11, 7)) -latex_fixed_fields.append((6, 0)) # Pseudo-ops present in the generated encodings. # By default pseudo-ops are not listed as they are considered aliases diff --git a/go_utils.py b/go_utils.py index 9815e702..88cb34b0 100644 --- a/go_utils.py +++ b/go_utils.py @@ -8,7 +8,7 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") -def make_go(instr_dict): +def make_go(instr_dict: InstrDict): args = " ".join(sys.argv) prelude = f"""// Code generated by {args}; DO NOT EDIT.""" diff --git a/latex_utils.py b/latex_utils.py index ab5f6f92..f1bf5321 100644 --- a/latex_utils.py +++ b/latex_utils.py @@ -1,13 +1,6 @@ -import collections -import copy -import glob import logging -import os import pprint -import re -import sys - -import yaml +from typing import TextIO from constants import * from shared_utils import * @@ -119,7 +112,9 @@ def make_latex_table(): # instructions listed in list_of_instructions will be dumped into latex. caption = "" type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"] - dataset_list = [(["_i", "32_i"], "RV32I Base Instruction Set", [], False)] + dataset_list: list[tuple[list[str], str, list[str], bool]] = [ + (["_i", "32_i"], "RV32I Base Instruction Set", [], False) + ] dataset_list.append((["_i"], "", ["fence_tso", "pause"], True)) make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) @@ -186,7 +181,13 @@ def make_latex_table(): latex_file.close() -def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): +def make_ext_latex_table( + type_list: "list[str]", + dataset: "list[tuple[list[str], str, list[str], bool]]", + latex_file: TextIO, + ilen: int, + caption: str, +): """ For a given collection of extensions this function dumps out a complete latex table which includes the encodings of the instructions. @@ -287,7 +288,7 @@ def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): # iterate ovr each instruction type and create a table entry for t in type_dict: - fields = [] + fields: list[tuple[int, int, str]] = [] # first capture all "arguments" of the type (funct3, funct7, rd, etc) # and capture their positions using arg_lut. @@ -334,7 +335,7 @@ def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): # for each entry in the dataset create a table content = "" for ext_list, title, filter_list, include_pseudo in dataset: - instr_dict = {} + instr_dict: InstrDict = {} # for all extensions list in ext_list, create a dictionary of # instructions associated with those extensions. diff --git a/parse.py b/parse.py index 9677ed6d..4339e81d 100755 --- a/parse.py +++ b/parse.py @@ -1,19 +1,18 @@ #!/usr/bin/env python3 import collections +import json import logging import pprint import sys -import yaml - -from c_utils import * -from chisel_utils import * -from constants import * -from go_utils import * -from latex_utils import * -from rust_utils import * -from shared_utils import * -from sverilog_utils import * +from c_utils import make_c +from chisel_utils import make_chisel +from constants import emitted_pseudo_ops +from go_utils import make_go +from latex_utils import make_latex_table, make_priv_latex_table +from rust_utils import make_rust +from shared_utils import add_segmented_vls_insn, create_inst_dict +from sverilog_utils import make_sverilog LOG_FORMAT = "%(levelname)s:: %(message)s" LOG_LEVEL = logging.INFO @@ -21,7 +20,8 @@ pretty_printer = pprint.PrettyPrinter(indent=2) logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) -if __name__ == "__main__": + +def main(): print(f"Running with args : {sys.argv}") extensions = sys.argv[1:] @@ -44,8 +44,8 @@ instr_dict = create_inst_dict(extensions, include_pseudo) - with open("instr_dict.yaml", "w") as outfile: - yaml.dump(add_segmented_vls_insn(instr_dict), outfile, default_flow_style=False) + with open("instr_dict.json", "w") as outfile: + json.dump(add_segmented_vls_insn(instr_dict), outfile, indent=2) instr_dict = collections.OrderedDict(sorted(instr_dict.items())) if "-c" in sys.argv[1:]: @@ -81,3 +81,7 @@ logging.info("instr-table.tex generated successfully") make_priv_latex_table() logging.info("priv-instr-table.tex generated successfully") + + +if __name__ == "__main__": + main() diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 00000000..67b99de7 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "typeCheckingMode": "strict", + "pythonVersion": "3.6", +} diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c3726e8b..00000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pyyaml diff --git a/rust_utils.py b/rust_utils.py index 68e0c8cd..ad160e8a 100644 --- a/rust_utils.py +++ b/rust_utils.py @@ -10,7 +10,7 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") -def make_rust(instr_dict): +def make_rust(instr_dict: InstrDict): mask_match_str = "" for i in instr_dict: mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' diff --git a/sverilog_utils.py b/sverilog_utils.py index ff116ccb..a3b75712 100644 --- a/sverilog_utils.py +++ b/sverilog_utils.py @@ -7,7 +7,7 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") -def make_sverilog(instr_dict): +def make_sverilog(instr_dict: InstrDict): names_str = "" for i in instr_dict: names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" diff --git a/test.py b/test.py index aa091b72..34482b1b 100644 --- a/test.py +++ b/test.py @@ -1,239 +1,608 @@ #!/usr/bin/env python3 - +import copy +import glob import logging -import unittest -from unittest.mock import Mock, patch - -from shared_utils import * - - -class EncodingUtilsTest(unittest.TestCase): - """Tests for basic encoding utilities""" +import os +import pprint +import re +from itertools import chain +from typing import Dict, TypedDict + +from constants import * + +LOG_FORMAT = "%(levelname)s:: %(message)s" +LOG_LEVEL = logging.INFO + +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + + +# Log an error message +def log_and_exit(message: str): + """Log an error message and exit the program.""" + logging.error(message) + raise SystemExit(1) + - def setUp(self): - self.logger = logging.getLogger() - self.logger.disabled = True +# Initialize encoding to 32-bit '-' values +def initialize_encoding(bits: int = 32) -> "list[str]": + """Initialize encoding with '-' to represent don't care bits.""" + return ["-"] * bits - def test_initialize_encoding(self): - """Test encoding initialization with different bit lengths""" - self.assertEqual(initialize_encoding(32), ["-"] * 32) - self.assertEqual(initialize_encoding(16), ["-"] * 16) - self.assertEqual(initialize_encoding(), ["-"] * 32) # default case - def test_validate_bit_range(self): - """Test bit range validation""" - # Valid cases - validate_bit_range(7, 3, 15, "test_instr") # 15 fits in 5 bits - validate_bit_range(31, 0, 0xFFFFFFFF, "test_instr") # max 32-bit value - - # Invalid cases - with self.assertRaises(SystemExit): - validate_bit_range(3, 7, 1, "test_instr") # msb < lsb - with self.assertRaises(SystemExit): - validate_bit_range(3, 0, 16, "test_instr") # value too large for range +# Validate bit range and value +def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str): + """Validate the bit range and entry value.""" + if msb < lsb: + log_and_exit( + f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding' + ) - def test_parse_instruction_line(self): - """Test instruction line parsing""" - name, remaining = parse_instruction_line("add.w r1, r2, r3") - self.assertEqual(name, "add_w") - self.assertEqual(remaining, "r1, r2, r3") - - name, remaining = parse_instruction_line("lui rd imm20 6..2=0x0D") - self.assertEqual(name, "lui") - self.assertEqual(remaining, "rd imm20 6..2=0x0D") - - -class BitManipulationTest(unittest.TestCase): - """Tests for bit manipulation and checking functions""" - - def setUp(self): - self.logger = logging.getLogger() - self.logger.disabled = True - self.test_encoding = initialize_encoding() - - def test_check_overlapping_bits(self): - """Test overlapping bits detection""" - # Valid case - no overlap - self.test_encoding[31 - 5] = "-" - check_overlapping_bits(self.test_encoding, 5, "test_instr") - - # Invalid case - overlap - self.test_encoding[31 - 5] = "1" - with self.assertRaises(SystemExit): - check_overlapping_bits(self.test_encoding, 5, "test_instr") - - def test_update_encoding_for_fixed_range(self): - """Test encoding updates for fixed ranges""" - encoding = initialize_encoding() - update_encoding_for_fixed_range(encoding, 6, 2, 0x0D, "test_instr") - - # Check specific bits are set correctly - self.assertEqual(encoding[31 - 6 : 31 - 1], ["0", "1", "1", "0", "1"]) - - def test_process_fixed_ranges(self): - """Test processing of fixed bit ranges""" - encoding = initialize_encoding() - remaining = "rd imm20 6..2=0x0D 1..0=3" - - result = process_fixed_ranges(remaining, encoding, "test_instr") - self.assertNotIn("6..2=0x0D", result) - self.assertNotIn("1..0=3", result) - - -class EncodingArgsTest(unittest.TestCase): - """Tests for encoding arguments handling""" - - def setUp(self): - self.logger = logging.getLogger() - self.logger.disabled = True - - @patch.dict("shared_utils.arg_lut", {"rd": (11, 7), "rs1": (19, 15)}) - def test_check_arg_lut(self): - """Test argument lookup table checking""" - encoding_args = initialize_encoding() - args = ["rd", "rs1"] - check_arg_lut(args, encoding_args, "test_instr") - - # Verify encoding_args has been updated correctly - self.assertEqual(encoding_args[31 - 11 : 31 - 6], ["rd"] * 5) - self.assertEqual(encoding_args[31 - 19 : 31 - 14], ["rs1"] * 5) - - @patch.dict("shared_utils.arg_lut", {"rs1": (19, 15)}) - def test_handle_arg_lut_mapping(self): - """Test handling of argument mappings""" - # Valid mapping - result = handle_arg_lut_mapping("rs1=new_arg", "test_instr") - self.assertEqual(result, "rs1=new_arg") - - # Invalid mapping - with self.assertRaises(SystemExit): - handle_arg_lut_mapping("invalid_arg=new_arg", "test_instr") - - -class ISAHandlingTest(unittest.TestCase): - """Tests for ISA type handling and validation""" - - def test_extract_isa_type(self): - """Test ISA type extraction""" - self.assertEqual(extract_isa_type("rv32_i"), "rv32") - self.assertEqual(extract_isa_type("rv64_m"), "rv64") - self.assertEqual(extract_isa_type("rv_c"), "rv") - - def test_is_rv_variant(self): - """Test RV variant checking""" - self.assertTrue(is_rv_variant("rv32", "rv")) - self.assertTrue(is_rv_variant("rv", "rv64")) - self.assertFalse(is_rv_variant("rv32", "rv64")) - - def test_same_base_isa(self): - """Test base ISA comparison""" - self.assertTrue(same_base_isa("rv32_i", ["rv32_m", "rv32_a"])) - self.assertTrue(same_base_isa("rv_i", ["rv32_i", "rv64_i"])) - self.assertFalse(same_base_isa("rv32_i", ["rv64_m"])) - - -class StringManipulationTest(unittest.TestCase): - """Tests for string manipulation utilities""" - - def test_pad_to_equal_length(self): - """Test string padding""" - str1, str2 = pad_to_equal_length("101", "1101") - self.assertEqual(len(str1), len(str2)) - self.assertEqual(str1, "-101") - self.assertEqual(str2, "1101") - - def test_overlaps(self): - """Test string overlap checking""" - self.assertTrue(overlaps("1-1", "101")) - self.assertTrue(overlaps("---", "101")) - self.assertFalse(overlaps("111", "101")) - - -class InstructionProcessingTest(unittest.TestCase): - """Tests for instruction processing and validation""" - - def setUp(self): - self.logger = logging.getLogger() - self.logger.disabled = True - # Create a patch for arg_lut - self.arg_lut_patcher = patch.dict( - "shared_utils.arg_lut", {"rd": (11, 7), "imm20": (31, 12)} + if entry_value >= (1 << (msb - lsb + 1)): + log_and_exit( + f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}' ) - self.arg_lut_patcher.start() - - def tearDown(self): - self.arg_lut_patcher.stop() - - @patch("shared_utils.fixed_ranges") - @patch("shared_utils.single_fixed") - def test_process_enc_line(self, mock_single_fixed: Mock, mock_fixed_ranges: Mock): - """Test processing of encoding lines""" - # Setup mock return values - mock_fixed_ranges.findall.return_value = [(6, 2, "0x0D")] - mock_fixed_ranges.sub.return_value = "rd imm20" - mock_single_fixed.findall.return_value = [] - mock_single_fixed.sub.return_value = "rd imm20" - - # Create a mock for split() that returns the expected list - mock_split = Mock(return_value=["rd", "imm20"]) - mock_single_fixed.sub.return_value = Mock(split=mock_split) - - name, data = process_enc_line("lui rd imm20 6..2=0x0D", "rv_i") - - self.assertEqual(name, "lui") - self.assertEqual(data["extension"], ["rv_i"]) - self.assertIn("rd", data["variable_fields"]) - self.assertIn("imm20", data["variable_fields"]) - - @patch("os.path.exists") - @patch("shared_utils.logging.error") - def test_find_extension_file(self, mock_logging: Mock, mock_exists: Mock): - """Test extension file finding""" - # Test successful case - file exists in main directory - mock_exists.side_effect = [True, False] - result = find_extension_file("rv32i", "/path/to/opcodes") - self.assertEqual(result, "/path/to/opcodes/rv32i") - - # Test successful case - file exists in unratified directory - mock_exists.side_effect = [False, True] - result = find_extension_file("rv32i", "/path/to/opcodes") - self.assertEqual(result, "/path/to/opcodes/unratified/rv32i") - - # Test failure case - file doesn't exist anywhere - mock_exists.side_effect = [False, False] - with self.assertRaises(SystemExit): - find_extension_file("rv32i", "/path/to/opcodes") - mock_logging.assert_called_with("Extension rv32i not found.") - - def test_process_standard_instructions(self): - """Test processing of standard instructions""" - lines = [ - "add rd rs1 rs2 31..25=0 14..12=0 6..2=0x0C 1..0=3", - "sub rd rs1 rs2 31..25=0x20 14..12=0 6..2=0x0C 1..0=3", - "$pseudo add_pseudo rd rs1 rs2", # Should be skipped - "$import rv32i::mul", # Should be skipped - ] - - instr_dict: InstrDict = {} - file_name = "rv32i" - - with patch("shared_utils.process_enc_line") as mock_process_enc: - # Setup mock return values - mock_process_enc.side_effect = [ - ("add", {"extension": ["rv32i"], "encoding": "encoding1"}), - ("sub", {"extension": ["rv32i"], "encoding": "encoding2"}), - ] - - process_standard_instructions(lines, instr_dict, file_name) - - # Verify process_enc_line was called twice (skipping pseudo and import) - self.assertEqual(mock_process_enc.call_count, 2) - - # Verify the instruction dictionary was updated correctly - self.assertEqual(len(instr_dict), 2) - self.assertIn("add", instr_dict) - self.assertIn("sub", instr_dict) - - -if __name__ == "__main__": - unittest.main() + + +# Split the instruction line into name and remaining part +def parse_instruction_line(line: str) -> "tuple[str, str]": + """Parse the instruction name and the remaining encoding details.""" + name, remaining = line.split(" ", 1) + name = name.replace(".", "_") # Replace dots for compatibility + remaining = remaining.lstrip() # Remove leading whitespace + return name, remaining + + +# Verify Overlapping Bits +def check_overlapping_bits(encoding: "list[str]", ind: int, line: str): + """Check for overlapping bits in the encoding.""" + if encoding[31 - ind] != "-": + log_and_exit( + f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes' + ) + + +# Update encoding for fixed ranges +def update_encoding_for_fixed_range( + encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str +): + """ + Update encoding bits for a given bit range. + Checks for overlapping bits and assigns the value accordingly. + """ + for ind in range(lsb, msb + 1): + check_overlapping_bits(encoding, ind, line) + bit = str((entry_value >> (ind - lsb)) & 1) + encoding[31 - ind] = bit + + +# Process fixed bit patterns +def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str): + """Process fixed bit ranges in the encoding.""" + for s2, s1, entry in fixed_ranges.findall(remaining): + msb, lsb, entry_value = int(s2), int(s1), int(entry, 0) + + # Validate bit range and entry value + validate_bit_range(msb, lsb, entry_value, line) + update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line) + + return fixed_ranges.sub(" ", remaining) + + +# Process single bit assignments +def process_single_fixed(remaining: str, encoding: "list[str]", line: str): + """Process single fixed assignments in the encoding.""" + for lsb, value, _drop in single_fixed.findall(remaining): + lsb = int(lsb, 0) + value = int(value, 0) + + check_overlapping_bits(encoding, lsb, line) + encoding[31 - lsb] = str(value) + + +# Main function to check argument look-up table +def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str): + """Check if arguments are present in arg_lut.""" + for arg in args: + if arg not in arg_lut: + arg = handle_arg_lut_mapping(arg, name) + msb, lsb = arg_lut[arg] + update_encoding_args(encoding_args, arg, msb, lsb) + + +# Handle missing argument mappings +def handle_arg_lut_mapping(arg: str, name: str): + """Handle cases where an argument needs to be mapped to an existing one.""" + parts = arg.split("=") + if len(parts) == 2: + existing_arg, _new_arg = parts + if existing_arg in arg_lut: + arg_lut[arg] = arg_lut[existing_arg] + else: + log_and_exit( + f" Found field {existing_arg} in variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + else: + log_and_exit( + f" Found variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + return arg + + +# Update encoding args with variables +def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int): + """Update encoding arguments and ensure no overlapping.""" + for ind in range(lsb, msb + 1): + check_overlapping_bits(encoding_args, ind, arg) + encoding_args[31 - ind] = arg + + +# Compute match and mask +def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]": + """Convert the encoding list to match and mask strings.""" + match = "".join(encoding).replace("-", "0") + mask = "".join(encoding).replace("0", "1").replace("-", "0") + return hex(int(match, 2)), hex(int(mask, 2)) + + +class SingleInstr(TypedDict): + encoding: str + variable_fields: "list[str]" + extension: "list[str]" + match: str + mask: str + + +InstrDict = Dict[str, SingleInstr] + + +# Processing main function for a line in the encoding file +def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]": + """ + This function processes each line of the encoding files (rv*). As part of + the processing, the function ensures that the encoding is legal through the + following checks:: + - there is no over specification (same bits assigned different values) + - there is no under specification (some bits not assigned values) + - bit ranges are in the format hi..lo=val where hi > lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + """ + encoding = initialize_encoding() + + # Parse the instruction line + name, remaining = parse_instruction_line(line) + + # Process fixed ranges + remaining = process_fixed_ranges(remaining, encoding, line) + + # Process single fixed assignments + process_single_fixed(remaining, encoding, line) + + # Convert the list of encodings into a match and mask + match, mask = convert_encoding_to_match_mask(encoding) + + # Check arguments in arg_lut + args = single_fixed.sub(" ", remaining).split() + encoding_args = encoding.copy() + + check_arg_lut(args, encoding_args, name) + + # Return single_dict + return name, { + "encoding": "".join(encoding), + "variable_fields": args, + "extension": [os.path.basename(ext)], + "match": match, + "mask": mask, + } + + +# Extract ISA Type +def extract_isa_type(ext_name: str) -> str: + """Extracts the ISA type from the extension name.""" + return ext_name.split("_")[0] + + +# Verify the types for RV* +def is_rv_variant(type1: str, type2: str) -> bool: + """Checks if the types are RV variants (rv32/rv64).""" + return (type2 == "rv" and type1 in {"rv32", "rv64"}) or ( + type1 == "rv" and type2 in {"rv32", "rv64"} + ) + + +# Check for same base ISA +def has_same_base_isa(type1: str, type2: str) -> bool: + """Determines if the two ISA types share the same base.""" + return type1 == type2 or is_rv_variant(type1, type2) + + +# Compare the base ISA type of a given extension name against a list of extension names +def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool: + """Checks if the base ISA type of ext_name matches any in ext_name_list.""" + type1 = extract_isa_type(ext_name) + return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list) + + +# Pad two strings to equal length +def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]": + """Pads two strings to equal length using the given padding character.""" + max_len = max(len(str1), len(str2)) + return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char) + + +# Check compatibility for two characters +def has_no_conflict(char1: str, char2: str) -> bool: + """Checks if two characters are compatible (either matching or don't-care).""" + return char1 == "-" or char2 == "-" or char1 == char2 + + +# Conflict check between two encoded strings +def overlaps(x: str, y: str) -> bool: + """Checks if two encoded strings overlap without conflict.""" + x, y = pad_to_equal_length(x, y) + return all(has_no_conflict(x[i], y[i]) for i in range(len(x))) + + +# Check presence of keys in dictionary. +def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool: + """Checks if key2 exists in the dictionary under key1.""" + return key1 in a and key2 in a[key1] + + +# Overlap allowance +def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool: + """Determines if overlap is allowed between x and y based on nested dictionary checks""" + return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x) + + +# Check overlap allowance between extensions +def extension_overlap_allowed(x: str, y: str) -> bool: + """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary.""" + return overlap_allowed(overlapping_extensions, x, y) + + +# Check overlap allowance between instructions +def instruction_overlap_allowed(x: str, y: str) -> bool: + """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary.""" + return overlap_allowed(overlapping_instructions, x, y) + + +# Check 'nf' field +def is_segmented_instruction(instruction: SingleInstr) -> bool: + """Checks if an instruction contains the 'nf' field.""" + return "nf" in instruction["variable_fields"] + + +# Expand 'nf' fields +def update_with_expanded_instructions( + updated_dict: InstrDict, key: str, value: SingleInstr +): + """Expands 'nf' fields in the instruction dictionary and updates it with new instructions.""" + for new_key, new_value in expand_nf_field(key, value): + updated_dict[new_key] = new_value + + +# Process instructions, expanding segmented ones and updating the dictionary +def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict: + """Processes instructions, expanding segmented ones and updating the dictionary.""" + # Use dictionary comprehension for efficiency + return dict( + chain.from_iterable( + ( + expand_nf_field(key, value) + if is_segmented_instruction(value) + else [(key, value)] + ) + for key, value in instr_dict.items() + ) + ) + + +# Expand the 'nf' field in the instruction dictionary +def expand_nf_field( + name: str, single_dict: SingleInstr +) -> "list[tuple[str, SingleInstr]]": + """Validate and prepare the instruction dictionary.""" + validate_nf_field(single_dict, name) + remove_nf_field(single_dict) + update_mask(single_dict) + + name_expand_index = name.find("e") + + # Pre compute the base match value and encoding prefix + base_match = int(single_dict["match"], 16) + encoding_prefix = single_dict["encoding"][3:] + + expanded_instructions = [ + create_expanded_instruction( + name, single_dict, nf, name_expand_index, base_match, encoding_prefix + ) + for nf in range(8) # Range of 0 to 7 + ] + + return expanded_instructions + + +# Validate the presence of 'nf' +def validate_nf_field(single_dict: SingleInstr, name: str): + """Validates the presence of 'nf' in variable fields before expansion.""" + if "nf" not in single_dict["variable_fields"]: + log_and_exit(f"Cannot expand nf field for instruction {name}") + + +# Remove 'nf' from variable fields +def remove_nf_field(single_dict: SingleInstr): + """Removes 'nf' from variable fields in the instruction dictionary.""" + single_dict["variable_fields"].remove("nf") + + +# Update the mask to include the 'nf' field +def update_mask(single_dict: SingleInstr): + """Updates the mask to include the 'nf' field in the instruction dictionary.""" + single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29) + + +# Create an expanded instruction +def create_expanded_instruction( + name: str, + single_dict: SingleInstr, + nf: int, + name_expand_index: int, + base_match: int, + encoding_prefix: str, +) -> "tuple[str, SingleInstr]": + """Creates an expanded instruction based on 'nf' value.""" + new_single_dict = copy.deepcopy(single_dict) + + # Update match value in one step + new_single_dict["match"] = hex(base_match | (nf << 29)) + new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix + + # Construct new instruction name + new_name = ( + name + if nf == 0 + else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}" + ) + + return (new_name, new_single_dict) + + +# Return a list of relevant lines from the specified file +def read_lines(file: str) -> "list[str]": + """Reads lines from a file and returns non-blank, non-comment lines.""" + with open(file) as fp: + lines = (line.rstrip() for line in fp) + return [line for line in lines if line and not line.startswith("#")] + + +# Update the instruction dictionary +def process_standard_instructions( + lines: "list[str]", instr_dict: InstrDict, file_name: str +): + """Processes standard instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$import" in line or "$pseudo" in line: + continue + logging.debug(f"Processing line: {line}") + name, single_dict = process_enc_line(line, file_name) + ext_name = os.path.basename(file_name) + + if name in instr_dict: + var = instr_dict[name]["extension"] + if same_base_isa(ext_name, var): + log_and_exit( + f"Instruction {name} from {ext_name} is already added from {var} in same base ISA" + ) + elif instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Instruction {name} from {ext_name} has different encodings in different base ISAs" + ) + + instr_dict[name]["extension"].extend(single_dict["extension"]) + else: + for key, item in instr_dict.items(): + if ( + overlaps(item["encoding"], single_dict["encoding"]) + and not extension_overlap_allowed(ext_name, item["extension"][0]) + and not instruction_overlap_allowed(name, key) + and same_base_isa(ext_name, item["extension"]) + ): + log_and_exit( + f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}' + ) + + instr_dict[name] = single_dict + + +# Incorporate pseudo instructions into the instruction dictionary based on given conditions +def process_pseudo_instructions( + lines: "list[str]", + instr_dict: InstrDict, + file_name: str, + opcodes_dir: str, + include_pseudo: bool, + include_pseudo_ops: "list[str]", +): + """Processes pseudo instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$pseudo" not in line: + continue + logging.debug(f"Processing pseudo line: {line}") + ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0] + ext_file = find_extension_file(ext, opcodes_dir) + + validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst) + + name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name) + if ( + orig_inst.replace(".", "_") not in instr_dict + or include_pseudo + or name in include_pseudo_ops + ): + if name not in instr_dict: + instr_dict[name] = single_dict + logging.debug(f"Including pseudo_op: {name}") + else: + if single_dict["match"] != instr_dict[name]["match"]: + instr_dict[f"{name}_pseudo"] = single_dict + # TODO: This expression is always false since both sides are list[str]. + elif single_dict["extension"] not in instr_dict[name]["extension"]: # type: ignore + instr_dict[name]["extension"].extend(single_dict["extension"]) + + +# Integrate imported instructions into the instruction dictionary +def process_imported_instructions( + lines: "list[str]", instr_dict: InstrDict, file_name: str, opcodes_dir: str +): + """Processes imported instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$import" not in line: + continue + logging.debug(f"Processing imported line: {line}") + import_ext, reg_instr = imported_regex.findall(line)[0] + ext_file = find_extension_file(import_ext, opcodes_dir) + + validate_instruction_in_extension(reg_instr, ext_file, file_name, line) + + for oline in open(ext_file): + if re.findall(f"^\\s*{reg_instr}\\s+", oline): + name, single_dict = process_enc_line(oline, file_name) + if name in instr_dict: + if instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings" + ) + instr_dict[name]["extension"].extend(single_dict["extension"]) + else: + instr_dict[name] = single_dict + break + + +# Locate the path of the specified extension file, checking fallback directories +def find_extension_file(ext: str, opcodes_dir: str): + """Finds the extension file path, considering the unratified directory if necessary.""" + ext_file = f"{opcodes_dir}/{ext}" + if not os.path.exists(ext_file): + ext_file = f"{opcodes_dir}/unratified/{ext}" + if not os.path.exists(ext_file): + log_and_exit(f"Extension {ext} not found.") + return ext_file + + +# Confirm the presence of an original instruction in the corresponding extension file. +def validate_instruction_in_extension( + inst: str, ext_file: str, file_name: str, pseudo_inst: str +): + """Validates if the original instruction exists in the dependent extension.""" + found = False + for oline in open(ext_file): + if re.findall(f"^\\s*{inst}\\s+", oline): + found = True + break + if not found: + log_and_exit( + f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}" + ) + + +# Construct a dictionary of instructions filtered by specified criteria +def create_inst_dict( + file_filter: "list[str]", + include_pseudo: bool = False, + include_pseudo_ops: "list[str]" = [], +) -> InstrDict: + """Creates a dictionary of instructions based on the provided file filters.""" + + """ + This function return a dictionary containing all instructions associated + with an extension defined by the file_filter input. + Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc. + Each node of the dictionary will correspond to an instruction which again is + a dictionary. The dictionary contents of each instruction includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + In order to build this dictionary, the function does 2 passes over the same + rv file: + - First pass: extracts all standard instructions, skipping pseudo ops + and imported instructions. For each selected line, the `process_enc_line` + function is called to create the dictionary contents of the instruction. + Checks are performed to ensure that the same instruction is not added + twice to the overall dictionary. + - Second pass: parses only pseudo_ops. For each pseudo_op, the function: + - Checks if the dependent extension and instruction exist. + - Adds the pseudo_op to the dictionary if the dependent instruction + is not already present; otherwise, it is skipped. + """ + opcodes_dir = os.path.dirname(os.path.realpath(__file__)) + instr_dict: InstrDict = {} + + file_names = [ + file + for fil in file_filter + for file in sorted(glob.glob(f"{opcodes_dir}/{fil}"), reverse=True) + ] + + logging.debug("Collecting standard instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for standard instructions") + lines = read_lines(file_name) + process_standard_instructions(lines, instr_dict, file_name) + + logging.debug("Collecting pseudo instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for pseudo instructions") + lines = read_lines(file_name) + process_pseudo_instructions( + lines, + instr_dict, + file_name, + opcodes_dir, + include_pseudo, + include_pseudo_ops, + ) + + logging.debug("Collecting imported instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for imported instructions") + lines = read_lines(file_name) + process_imported_instructions(lines, instr_dict, file_name, opcodes_dir) + + return instr_dict + + +# Extracts the extensions used in an instruction dictionary +def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]": + return list({item["extension"][0] for item in instr_dict.values()}) + + +# Returns signed interpretation of a value within a given width +def signed(value: int, width: int) -> int: + return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)