diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 4aac843c..95c39fad 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -15,9 +15,8 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: 3.8
-    - name: Install PyYAML
+    - name: Install Coverage
       run: |
-        pip3 install -r requirements.txt
         pip3 install coverage
     - name: Run pre-commit
       run: |
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a30716ac..0eb0c898 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -15,9 +15,8 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: 3.8
-    - name: Install PyYAML
+    - name: Install Coverage
       run: |
-         pip3 install -r requirements.txt
          pip3 install coverage
     - name: Test error outputs
       run: coverage run -m unittest -b
diff --git a/.gitignore b/.gitignore
index 4ddba6b5..13806158 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,6 @@ priv-instr-table.tex
 inst.rs
 inst.spinalhdl
 inst.sverilog
-instr_dict.yaml
+instr_dict.json
 
 __pycache__/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cca4b3b9..bd41c169 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,13 +26,8 @@ repos:
   #   rev: v3.3.1
   #   hooks:
   #     - id: pylint
-  #       additional_dependencies:
-  #         - "pyyaml==6.0.2"
 
-  # TODO: Enable this when types are added.
-  # - repo: https://github.com/RobertCraigie/pyright-python
-  #   rev: v1.1.383
-  #   hooks:
-  #     - id: pyright
-  #       additional_dependencies:
-  #         - "pyyaml==6.0.2"
+  - repo: https://github.com/RobertCraigie/pyright-python
+    rev: v1.1.383
+    hooks:
+      - id: pyright
diff --git a/README.md b/README.md
index e6702ed6..e7a988cc 100644
--- a/README.md
+++ b/README.md
@@ -126,9 +126,11 @@ of extensions are being processed such that the *base-instruction* is not includ
 
 The following artifacts can be generated using parse.py:
 
-- instr\_dict.yaml : This is file generated always by parse.py and contains the
-  entire main dictionary `instr\_dict` in YAML format. Note, in this yaml the
-  *dots* in an instruction are replaced with *underscores*
+- instr\_dict.json : This is always generated by parse.py and contains the
+  entire main dictionary `instr\_dict` in JSON format. Note, in this file the
+  *dots* in an instruction are replaced with *underscores*. In previous
+  versions of this project the generated file was instr\_dict.yaml. Note that
+  JSON is a subset of YAML so the file can still be read by any YAML parser.
 - encoding.out.h : this is the header file that is used by tools like spike, pk, etc
 - instr-table.tex : the latex table of instructions used in the riscv-unpriv spec
 - priv-instr-table.tex : the latex table of instruction used in the riscv-priv spec
@@ -138,14 +140,6 @@ The following artifacts can be generated using parse.py:
 - inst.spinalhdl : spinalhdl code to decode instructions
 - inst.go : go code to decode instructions
 
-Make sure you install the required python pre-requisites are installed by executing the following
-command:
-
-```
-sudo apt-get install python-pip3
-pip3 install -r requirements.txt
-```
-
 To generate all the above artifacts for all instructions currently checked in, simply run `make` from the root-directory. This should print the following log on the command-line:
 
 ```
@@ -220,6 +214,6 @@ DEBUG::      Processing line: bne     bimm12hi rs1 rs2 bimm12lo 14..12=1 6..2=0x
 ## How do I find where an instruction is defined?
 
 You can use `grep "^\s*<instr-name>" rv* unratified/rv*` OR run `make` and open
-`instr_dict.yaml` and search of the instruction you are looking for. Within that
-instruction the `extension` field will indicate which file the instruction was
-picked from.
+`instr_dict.json` and search for the instruction you are looking for. Within
+that instruction the `extension` field will indicate which file the
+instruction was picked from.
diff --git a/c_utils.py b/c_utils.py
index 40fa4cb4..0ebc6c2d 100644
--- a/c_utils.py
+++ b/c_utils.py
@@ -8,7 +8,7 @@
 logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
 
 
-def make_c(instr_dict):
+def make_c(instr_dict: InstrDict):
     mask_match_str = ""
     declare_insn_str = ""
     for i in instr_dict:
diff --git a/chisel_utils.py b/chisel_utils.py
index 0943584d..9916b76c 100644
--- a/chisel_utils.py
+++ b/chisel_utils.py
@@ -10,7 +10,7 @@
 logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
 
 
-def make_chisel(instr_dict, spinal_hdl=False):
+def make_chisel(instr_dict: InstrDict, spinal_hdl: bool = False):
 
     chisel_names = ""
     cause_names_str = ""
@@ -31,7 +31,7 @@ def make_chisel(instr_dict, spinal_hdl=False):
             elif "rv_" in e:
                 e_format = e.replace("rv_", "").upper()
             else:
-                e_format = e.upper
+                e_format = e.upper()
             chisel_names += f'  val {e_format+"Type"} = Map(\n'
             for instr in e_instrs:
                 tmp_instr_name = '"' + instr.upper().replace(".", "_") + '"'
diff --git a/constants.py b/constants.py
index cff9c043..4b608e35 100644
--- a/constants.py
+++ b/constants.py
@@ -1,6 +1,7 @@
 import csv
 import re
 
+# TODO: The constants in this file should be in all caps.
 overlapping_extensions = {
     "rv_zcmt": {"rv_c_d"},
     "rv_zcmp": {"rv_c_d"},
@@ -21,29 +22,29 @@
 
 # regex to find <msb>..<lsb>=<val> patterns in instruction
 fixed_ranges = re.compile(
-    "\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*", re.M
+    r"\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*", re.M
 )
 
 # regex to find <lsb>=<val> patterns in instructions
 # single_fixed = re.compile('\s+(?P<lsb>\d+)=(?P<value>[\w\d]*)[\s$]*', re.M)
-single_fixed = re.compile("(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))", re.M)
+single_fixed = re.compile(r"(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))", re.M)
 
 # regex to find the overloading condition variable
-var_regex = re.compile("(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M)
+var_regex = re.compile(r"(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M)
 
 # regex for pseudo op instructions returns the dependent filename, dependent
 # instruction, the pseudo op name and the encoding string
 pseudo_regex = re.compile(
-    "^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$",
+    r"^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$",
     re.M,
 )
 
 imported_regex = re.compile(
-    "^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)", re.M
+    r"^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)", re.M
 )
 
 
-def read_csv(filename):
+def read_csv(filename: str):
     """
     Reads a CSV file and returns a list of tuples.
     Each tuple contains an integer value (from the first column) and a string (from the second column).
@@ -79,126 +80,99 @@ def read_csv(filename):
 
 # dictionary containing the mapping of the argument to the what the fields in
 # the latex table should be
-latex_mapping = {}
-latex_mapping["imm12"] = "imm[11:0]"
-latex_mapping["rs1"] = "rs1"
-latex_mapping["rs2"] = "rs2"
-latex_mapping["rd"] = "rd"
-latex_mapping["imm20"] = "imm[31:12]"
-latex_mapping["bimm12hi"] = "imm[12$\\vert$10:5]"
-latex_mapping["bimm12lo"] = "imm[4:1$\\vert$11]"
-latex_mapping["imm12hi"] = "imm[11:5]"
-latex_mapping["imm12lo"] = "imm[4:0]"
-latex_mapping["jimm20"] = "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]"
-latex_mapping["zimm"] = "uimm"
-latex_mapping["shamtw"] = "shamt"
-latex_mapping["shamtd"] = "shamt"
-latex_mapping["shamtq"] = "shamt"
-latex_mapping["rd_p"] = "rd\\,$'$"
-latex_mapping["rs1_p"] = "rs1\\,$'$"
-latex_mapping["rs2_p"] = "rs2\\,$'$"
-latex_mapping["rd_rs1_n0"] = "rd/rs$\\neq$0"
-latex_mapping["rd_rs1_p"] = "rs1\\,$'$/rs2\\,$'$"
-latex_mapping["c_rs2"] = "rs2"
-latex_mapping["c_rs2_n0"] = "rs2$\\neq$0"
-latex_mapping["rd_n0"] = "rd$\\neq$0"
-latex_mapping["rs1_n0"] = "rs1$\\neq$0"
-latex_mapping["c_rs1_n0"] = "rs1$\\neq$0"
-latex_mapping["rd_rs1"] = "rd/rs1"
-latex_mapping["zimm6hi"] = "uimm[5]"
-latex_mapping["zimm6lo"] = "uimm[4:0]"
-latex_mapping["c_nzuimm10"] = "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]"
-latex_mapping["c_uimm7lo"] = "uimm[2$\\vert$6]"
-latex_mapping["c_uimm7hi"] = "uimm[5:3]"
-latex_mapping["c_uimm8lo"] = "uimm[7:6]"
-latex_mapping["c_uimm8hi"] = "uimm[5:3]"
-latex_mapping["c_uimm9lo"] = "uimm[7:6]"
-latex_mapping["c_uimm9hi"] = "uimm[5:4$\\vert$8]"
-latex_mapping["c_nzimm6lo"] = "nzimm[4:0]"
-latex_mapping["c_nzimm6hi"] = "nzimm[5]"
-latex_mapping["c_imm6lo"] = "imm[4:0]"
-latex_mapping["c_imm6hi"] = "imm[5]"
-latex_mapping["c_nzimm10hi"] = "nzimm[9]"
-latex_mapping["c_nzimm10lo"] = "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]"
-latex_mapping["c_nzimm18hi"] = "nzimm[17]"
-latex_mapping["c_nzimm18lo"] = "nzimm[16:12]"
-latex_mapping["c_imm12"] = (
-    "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]"
-)
-latex_mapping["c_bimm9lo"] = "imm[7:6$\\vert$2:1$\\vert$5]"
-latex_mapping["c_bimm9hi"] = "imm[8$\\vert$4:3]"
-latex_mapping["c_nzuimm5"] = "nzuimm[4:0]"
-latex_mapping["c_nzuimm6lo"] = "nzuimm[4:0]"
-latex_mapping["c_nzuimm6hi"] = "nzuimm[5]"
-latex_mapping["c_uimm8splo"] = "uimm[4:2$\\vert$7:6]"
-latex_mapping["c_uimm8sphi"] = "uimm[5]"
-latex_mapping["c_uimm8sp_s"] = "uimm[5:2$\\vert$7:6]"
-latex_mapping["c_uimm10splo"] = "uimm[4$\\vert$9:6]"
-latex_mapping["c_uimm10sphi"] = "uimm[5]"
-latex_mapping["c_uimm9splo"] = "uimm[4:3$\\vert$8:6]"
-latex_mapping["c_uimm9sphi"] = "uimm[5]"
-latex_mapping["c_uimm10sp_s"] = "uimm[5:4$\\vert$9:6]"
-latex_mapping["c_uimm9sp_s"] = "uimm[5:3$\\vert$8:6]"
+latex_mapping = {
+    "imm12": "imm[11:0]",
+    "rs1": "rs1",
+    "rs2": "rs2",
+    "rd": "rd",
+    "imm20": "imm[31:12]",
+    "bimm12hi": "imm[12$\\vert$10:5]",
+    "bimm12lo": "imm[4:1$\\vert$11]",
+    "imm12hi": "imm[11:5]",
+    "imm12lo": "imm[4:0]",
+    "jimm20": "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]",
+    "zimm": "uimm",
+    "shamtw": "shamt",
+    "shamtd": "shamt",
+    "shamtq": "shamt",
+    "rd_p": "rd\\,$'$",
+    "rs1_p": "rs1\\,$'$",
+    "rs2_p": "rs2\\,$'$",
+    "rd_rs1_n0": "rd/rs$\\neq$0",
+    "rd_rs1_p": "rs1\\,$'$/rs2\\,$'$",
+    "c_rs2": "rs2",
+    "c_rs2_n0": "rs2$\\neq$0",
+    "rd_n0": "rd$\\neq$0",
+    "rs1_n0": "rs1$\\neq$0",
+    "c_rs1_n0": "rs1$\\neq$0",
+    "rd_rs1": "rd/rs1",
+    "zimm6hi": "uimm[5]",
+    "zimm6lo": "uimm[4:0]",
+    "c_nzuimm10": "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]",
+    "c_uimm7lo": "uimm[2$\\vert$6]",
+    "c_uimm7hi": "uimm[5:3]",
+    "c_uimm8lo": "uimm[7:6]",
+    "c_uimm8hi": "uimm[5:3]",
+    "c_uimm9lo": "uimm[7:6]",
+    "c_uimm9hi": "uimm[5:4$\\vert$8]",
+    "c_nzimm6lo": "nzimm[4:0]",
+    "c_nzimm6hi": "nzimm[5]",
+    "c_imm6lo": "imm[4:0]",
+    "c_imm6hi": "imm[5]",
+    "c_nzimm10hi": "nzimm[9]",
+    "c_nzimm10lo": "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]",
+    "c_nzimm18hi": "nzimm[17]",
+    "c_nzimm18lo": "nzimm[16:12]",
+    "c_imm12": "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]",
+    "c_bimm9lo": "imm[7:6$\\vert$2:1$\\vert$5]",
+    "c_bimm9hi": "imm[8$\\vert$4:3]",
+    "c_nzuimm5": "nzuimm[4:0]",
+    "c_nzuimm6lo": "nzuimm[4:0]",
+    "c_nzuimm6hi": "nzuimm[5]",
+    "c_uimm8splo": "uimm[4:2$\\vert$7:6]",
+    "c_uimm8sphi": "uimm[5]",
+    "c_uimm8sp_s": "uimm[5:2$\\vert$7:6]",
+    "c_uimm10splo": "uimm[4$\\vert$9:6]",
+    "c_uimm10sphi": "uimm[5]",
+    "c_uimm9splo": "uimm[4:3$\\vert$8:6]",
+    "c_uimm9sphi": "uimm[5]",
+    "c_uimm10sp_s": "uimm[5:4$\\vert$9:6]",
+    "c_uimm9sp_s": "uimm[5:3$\\vert$8:6]",
+}
 
 # created a dummy instruction-dictionary like dictionary for all the instruction
 # types so that the same logic can be used to create their tables
-latex_inst_type = {}
-latex_inst_type["R-type"] = {}
-latex_inst_type["R-type"]["variable_fields"] = [
-    "opcode",
-    "rd",
-    "funct3",
-    "rs1",
-    "rs2",
-    "funct7",
-]
-latex_inst_type["R4-type"] = {}
-latex_inst_type["R4-type"]["variable_fields"] = [
-    "opcode",
-    "rd",
-    "funct3",
-    "rs1",
-    "rs2",
-    "funct2",
-    "rs3",
-]
-latex_inst_type["I-type"] = {}
-latex_inst_type["I-type"]["variable_fields"] = [
-    "opcode",
-    "rd",
-    "funct3",
-    "rs1",
-    "imm12",
-]
-latex_inst_type["S-type"] = {}
-latex_inst_type["S-type"]["variable_fields"] = [
-    "opcode",
-    "imm12lo",
-    "funct3",
-    "rs1",
-    "rs2",
-    "imm12hi",
-]
-latex_inst_type["B-type"] = {}
-latex_inst_type["B-type"]["variable_fields"] = [
-    "opcode",
-    "bimm12lo",
-    "funct3",
-    "rs1",
-    "rs2",
-    "bimm12hi",
+latex_inst_type = {
+    "R-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct7"],
+    },
+    "R4-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct2", "rs3"],
+    },
+    "I-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "imm12"],
+    },
+    "S-type": {
+        "variable_fields": ["opcode", "imm12lo", "funct3", "rs1", "rs2", "imm12hi"],
+    },
+    "B-type": {
+        "variable_fields": ["opcode", "bimm12lo", "funct3", "rs1", "rs2", "bimm12hi"],
+    },
+    "U-type": {
+        "variable_fields": ["opcode", "rd", "imm20"],
+    },
+    "J-type": {
+        "variable_fields": ["opcode", "rd", "jimm20"],
+    },
+}
+latex_fixed_fields = [
+    (31, 25),
+    (24, 20),
+    (19, 15),
+    (14, 12),
+    (11, 7),
+    (6, 0),
 ]
-latex_inst_type["U-type"] = {}
-latex_inst_type["U-type"]["variable_fields"] = ["opcode", "rd", "imm20"]
-latex_inst_type["J-type"] = {}
-latex_inst_type["J-type"]["variable_fields"] = ["opcode", "rd", "jimm20"]
-latex_fixed_fields = []
-latex_fixed_fields.append((31, 25))
-latex_fixed_fields.append((24, 20))
-latex_fixed_fields.append((19, 15))
-latex_fixed_fields.append((14, 12))
-latex_fixed_fields.append((11, 7))
-latex_fixed_fields.append((6, 0))
 
 # Pseudo-ops present in the generated encodings.
 # By default pseudo-ops are not listed as they are considered aliases
diff --git a/go_utils.py b/go_utils.py
index 9815e702..88cb34b0 100644
--- a/go_utils.py
+++ b/go_utils.py
@@ -8,7 +8,7 @@
 logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
 
 
-def make_go(instr_dict):
+def make_go(instr_dict: InstrDict):
 
     args = " ".join(sys.argv)
     prelude = f"""// Code generated by {args}; DO NOT EDIT."""
diff --git a/latex_utils.py b/latex_utils.py
index ab5f6f92..f1bf5321 100644
--- a/latex_utils.py
+++ b/latex_utils.py
@@ -1,13 +1,6 @@
-import collections
-import copy
-import glob
 import logging
-import os
 import pprint
-import re
-import sys
-
-import yaml
+from typing import TextIO
 
 from constants import *
 from shared_utils import *
@@ -119,7 +112,9 @@ def make_latex_table():
     # instructions listed in list_of_instructions will be dumped into latex.
     caption = ""
     type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"]
-    dataset_list = [(["_i", "32_i"], "RV32I Base Instruction Set", [], False)]
+    dataset_list: list[tuple[list[str], str, list[str], bool]] = [
+        (["_i", "32_i"], "RV32I Base Instruction Set", [], False)
+    ]
     dataset_list.append((["_i"], "", ["fence_tso", "pause"], True))
     make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
 
@@ -186,7 +181,13 @@ def make_latex_table():
     latex_file.close()
 
 
-def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption):
+def make_ext_latex_table(
+    type_list: "list[str]",
+    dataset: "list[tuple[list[str], str, list[str], bool]]",
+    latex_file: TextIO,
+    ilen: int,
+    caption: str,
+):
     """
     For a given collection of extensions this function dumps out a complete
     latex table which includes the encodings of the instructions.
@@ -287,7 +288,7 @@ def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption):
 
     # iterate ovr each instruction type and create a table entry
     for t in type_dict:
-        fields = []
+        fields: list[tuple[int, int, str]] = []
 
         # first capture all "arguments" of the type (funct3, funct7, rd, etc)
         # and capture their positions using arg_lut.
@@ -334,7 +335,7 @@ def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption):
     # for each entry in the dataset create a table
     content = ""
     for ext_list, title, filter_list, include_pseudo in dataset:
-        instr_dict = {}
+        instr_dict: InstrDict = {}
 
         # for all extensions list in ext_list, create a dictionary of
         # instructions associated with those extensions.
diff --git a/parse.py b/parse.py
index 9677ed6d..4339e81d 100755
--- a/parse.py
+++ b/parse.py
@@ -1,19 +1,18 @@
 #!/usr/bin/env python3
 import collections
+import json
 import logging
 import pprint
 import sys
 
-import yaml
-
-from c_utils import *
-from chisel_utils import *
-from constants import *
-from go_utils import *
-from latex_utils import *
-from rust_utils import *
-from shared_utils import *
-from sverilog_utils import *
+from c_utils import make_c
+from chisel_utils import make_chisel
+from constants import emitted_pseudo_ops
+from go_utils import make_go
+from latex_utils import make_latex_table, make_priv_latex_table
+from rust_utils import make_rust
+from shared_utils import add_segmented_vls_insn, create_inst_dict
+from sverilog_utils import make_sverilog
 
 LOG_FORMAT = "%(levelname)s:: %(message)s"
 LOG_LEVEL = logging.INFO
@@ -21,7 +20,8 @@
 pretty_printer = pprint.PrettyPrinter(indent=2)
 logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
 
-if __name__ == "__main__":
+
+def main():
     print(f"Running with args : {sys.argv}")
 
     extensions = sys.argv[1:]
@@ -44,8 +44,8 @@
 
     instr_dict = create_inst_dict(extensions, include_pseudo)
 
-    with open("instr_dict.yaml", "w") as outfile:
-        yaml.dump(add_segmented_vls_insn(instr_dict), outfile, default_flow_style=False)
+    with open("instr_dict.json", "w") as outfile:
+        json.dump(add_segmented_vls_insn(instr_dict), outfile, indent=2)
     instr_dict = collections.OrderedDict(sorted(instr_dict.items()))
 
     if "-c" in sys.argv[1:]:
@@ -81,3 +81,7 @@
         logging.info("instr-table.tex generated successfully")
         make_priv_latex_table()
         logging.info("priv-instr-table.tex generated successfully")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyrightconfig.json b/pyrightconfig.json
new file mode 100644
index 00000000..67b99de7
--- /dev/null
+++ b/pyrightconfig.json
@@ -0,0 +1,4 @@
+{
+    "typeCheckingMode": "strict",
+    "pythonVersion": "3.6",
+}
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index c3726e8b..00000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pyyaml
diff --git a/rust_utils.py b/rust_utils.py
index 68e0c8cd..ad160e8a 100644
--- a/rust_utils.py
+++ b/rust_utils.py
@@ -10,7 +10,7 @@
 logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
 
 
-def make_rust(instr_dict):
+def make_rust(instr_dict: InstrDict):
     mask_match_str = ""
     for i in instr_dict:
         mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
diff --git a/sverilog_utils.py b/sverilog_utils.py
index ff116ccb..a3b75712 100644
--- a/sverilog_utils.py
+++ b/sverilog_utils.py
@@ -7,7 +7,7 @@
 logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
 
 
-def make_sverilog(instr_dict):
+def make_sverilog(instr_dict: InstrDict):
     names_str = ""
     for i in instr_dict:
         names_str += f"  localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
diff --git a/test.py b/test.py
index aa091b72..34482b1b 100644
--- a/test.py
+++ b/test.py
@@ -1,239 +1,608 @@
 #!/usr/bin/env python3
-
+import copy
+import glob
 import logging
-import unittest
-from unittest.mock import Mock, patch
-
-from shared_utils import *
-
-
-class EncodingUtilsTest(unittest.TestCase):
-    """Tests for basic encoding utilities"""
+import os
+import pprint
+import re
+from itertools import chain
+from typing import Dict, TypedDict
+
+from constants import *
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+# Log an error message
+def log_and_exit(message: str):
+    """Log an error message and exit the program."""
+    logging.error(message)
+    raise SystemExit(1)
+
 
-    def setUp(self):
-        self.logger = logging.getLogger()
-        self.logger.disabled = True
+# Initialize encoding to 32-bit '-' values
+def initialize_encoding(bits: int = 32) -> "list[str]":
+    """Initialize encoding with '-' to represent don't care bits."""
+    return ["-"] * bits
 
-    def test_initialize_encoding(self):
-        """Test encoding initialization with different bit lengths"""
-        self.assertEqual(initialize_encoding(32), ["-"] * 32)
-        self.assertEqual(initialize_encoding(16), ["-"] * 16)
-        self.assertEqual(initialize_encoding(), ["-"] * 32)  # default case
 
-    def test_validate_bit_range(self):
-        """Test bit range validation"""
-        # Valid cases
-        validate_bit_range(7, 3, 15, "test_instr")  # 15 fits in 5 bits
-        validate_bit_range(31, 0, 0xFFFFFFFF, "test_instr")  # max 32-bit value
-
-        # Invalid cases
-        with self.assertRaises(SystemExit):
-            validate_bit_range(3, 7, 1, "test_instr")  # msb < lsb
-        with self.assertRaises(SystemExit):
-            validate_bit_range(3, 0, 16, "test_instr")  # value too large for range
+# Validate bit range and value
+def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str):
+    """Validate the bit range and entry value."""
+    if msb < lsb:
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding'
+        )
 
-    def test_parse_instruction_line(self):
-        """Test instruction line parsing"""
-        name, remaining = parse_instruction_line("add.w r1, r2, r3")
-        self.assertEqual(name, "add_w")
-        self.assertEqual(remaining, "r1, r2, r3")
-
-        name, remaining = parse_instruction_line("lui rd imm20 6..2=0x0D")
-        self.assertEqual(name, "lui")
-        self.assertEqual(remaining, "rd imm20 6..2=0x0D")
-
-
-class BitManipulationTest(unittest.TestCase):
-    """Tests for bit manipulation and checking functions"""
-
-    def setUp(self):
-        self.logger = logging.getLogger()
-        self.logger.disabled = True
-        self.test_encoding = initialize_encoding()
-
-    def test_check_overlapping_bits(self):
-        """Test overlapping bits detection"""
-        # Valid case - no overlap
-        self.test_encoding[31 - 5] = "-"
-        check_overlapping_bits(self.test_encoding, 5, "test_instr")
-
-        # Invalid case - overlap
-        self.test_encoding[31 - 5] = "1"
-        with self.assertRaises(SystemExit):
-            check_overlapping_bits(self.test_encoding, 5, "test_instr")
-
-    def test_update_encoding_for_fixed_range(self):
-        """Test encoding updates for fixed ranges"""
-        encoding = initialize_encoding()
-        update_encoding_for_fixed_range(encoding, 6, 2, 0x0D, "test_instr")
-
-        # Check specific bits are set correctly
-        self.assertEqual(encoding[31 - 6 : 31 - 1], ["0", "1", "1", "0", "1"])
-
-    def test_process_fixed_ranges(self):
-        """Test processing of fixed bit ranges"""
-        encoding = initialize_encoding()
-        remaining = "rd imm20 6..2=0x0D 1..0=3"
-
-        result = process_fixed_ranges(remaining, encoding, "test_instr")
-        self.assertNotIn("6..2=0x0D", result)
-        self.assertNotIn("1..0=3", result)
-
-
-class EncodingArgsTest(unittest.TestCase):
-    """Tests for encoding arguments handling"""
-
-    def setUp(self):
-        self.logger = logging.getLogger()
-        self.logger.disabled = True
-
-    @patch.dict("shared_utils.arg_lut", {"rd": (11, 7), "rs1": (19, 15)})
-    def test_check_arg_lut(self):
-        """Test argument lookup table checking"""
-        encoding_args = initialize_encoding()
-        args = ["rd", "rs1"]
-        check_arg_lut(args, encoding_args, "test_instr")
-
-        # Verify encoding_args has been updated correctly
-        self.assertEqual(encoding_args[31 - 11 : 31 - 6], ["rd"] * 5)
-        self.assertEqual(encoding_args[31 - 19 : 31 - 14], ["rs1"] * 5)
-
-    @patch.dict("shared_utils.arg_lut", {"rs1": (19, 15)})
-    def test_handle_arg_lut_mapping(self):
-        """Test handling of argument mappings"""
-        # Valid mapping
-        result = handle_arg_lut_mapping("rs1=new_arg", "test_instr")
-        self.assertEqual(result, "rs1=new_arg")
-
-        # Invalid mapping
-        with self.assertRaises(SystemExit):
-            handle_arg_lut_mapping("invalid_arg=new_arg", "test_instr")
-
-
-class ISAHandlingTest(unittest.TestCase):
-    """Tests for ISA type handling and validation"""
-
-    def test_extract_isa_type(self):
-        """Test ISA type extraction"""
-        self.assertEqual(extract_isa_type("rv32_i"), "rv32")
-        self.assertEqual(extract_isa_type("rv64_m"), "rv64")
-        self.assertEqual(extract_isa_type("rv_c"), "rv")
-
-    def test_is_rv_variant(self):
-        """Test RV variant checking"""
-        self.assertTrue(is_rv_variant("rv32", "rv"))
-        self.assertTrue(is_rv_variant("rv", "rv64"))
-        self.assertFalse(is_rv_variant("rv32", "rv64"))
-
-    def test_same_base_isa(self):
-        """Test base ISA comparison"""
-        self.assertTrue(same_base_isa("rv32_i", ["rv32_m", "rv32_a"]))
-        self.assertTrue(same_base_isa("rv_i", ["rv32_i", "rv64_i"]))
-        self.assertFalse(same_base_isa("rv32_i", ["rv64_m"]))
-
-
-class StringManipulationTest(unittest.TestCase):
-    """Tests for string manipulation utilities"""
-
-    def test_pad_to_equal_length(self):
-        """Test string padding"""
-        str1, str2 = pad_to_equal_length("101", "1101")
-        self.assertEqual(len(str1), len(str2))
-        self.assertEqual(str1, "-101")
-        self.assertEqual(str2, "1101")
-
-    def test_overlaps(self):
-        """Test string overlap checking"""
-        self.assertTrue(overlaps("1-1", "101"))
-        self.assertTrue(overlaps("---", "101"))
-        self.assertFalse(overlaps("111", "101"))
-
-
-class InstructionProcessingTest(unittest.TestCase):
-    """Tests for instruction processing and validation"""
-
-    def setUp(self):
-        self.logger = logging.getLogger()
-        self.logger.disabled = True
-        # Create a patch for arg_lut
-        self.arg_lut_patcher = patch.dict(
-            "shared_utils.arg_lut", {"rd": (11, 7), "imm20": (31, 12)}
+    if entry_value >= (1 << (msb - lsb + 1)):
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
         )
-        self.arg_lut_patcher.start()
-
-    def tearDown(self):
-        self.arg_lut_patcher.stop()
-
-    @patch("shared_utils.fixed_ranges")
-    @patch("shared_utils.single_fixed")
-    def test_process_enc_line(self, mock_single_fixed: Mock, mock_fixed_ranges: Mock):
-        """Test processing of encoding lines"""
-        # Setup mock return values
-        mock_fixed_ranges.findall.return_value = [(6, 2, "0x0D")]
-        mock_fixed_ranges.sub.return_value = "rd imm20"
-        mock_single_fixed.findall.return_value = []
-        mock_single_fixed.sub.return_value = "rd imm20"
-
-        # Create a mock for split() that returns the expected list
-        mock_split = Mock(return_value=["rd", "imm20"])
-        mock_single_fixed.sub.return_value = Mock(split=mock_split)
-
-        name, data = process_enc_line("lui rd imm20 6..2=0x0D", "rv_i")
-
-        self.assertEqual(name, "lui")
-        self.assertEqual(data["extension"], ["rv_i"])
-        self.assertIn("rd", data["variable_fields"])
-        self.assertIn("imm20", data["variable_fields"])
-
-    @patch("os.path.exists")
-    @patch("shared_utils.logging.error")
-    def test_find_extension_file(self, mock_logging: Mock, mock_exists: Mock):
-        """Test extension file finding"""
-        # Test successful case - file exists in main directory
-        mock_exists.side_effect = [True, False]
-        result = find_extension_file("rv32i", "/path/to/opcodes")
-        self.assertEqual(result, "/path/to/opcodes/rv32i")
-
-        # Test successful case - file exists in unratified directory
-        mock_exists.side_effect = [False, True]
-        result = find_extension_file("rv32i", "/path/to/opcodes")
-        self.assertEqual(result, "/path/to/opcodes/unratified/rv32i")
-
-        # Test failure case - file doesn't exist anywhere
-        mock_exists.side_effect = [False, False]
-        with self.assertRaises(SystemExit):
-            find_extension_file("rv32i", "/path/to/opcodes")
-        mock_logging.assert_called_with("Extension rv32i not found.")
-
-    def test_process_standard_instructions(self):
-        """Test processing of standard instructions"""
-        lines = [
-            "add rd rs1 rs2 31..25=0 14..12=0 6..2=0x0C 1..0=3",
-            "sub rd rs1 rs2 31..25=0x20 14..12=0 6..2=0x0C 1..0=3",
-            "$pseudo add_pseudo rd rs1 rs2",  # Should be skipped
-            "$import rv32i::mul",  # Should be skipped
-        ]
-
-        instr_dict: InstrDict = {}
-        file_name = "rv32i"
-
-        with patch("shared_utils.process_enc_line") as mock_process_enc:
-            # Setup mock return values
-            mock_process_enc.side_effect = [
-                ("add", {"extension": ["rv32i"], "encoding": "encoding1"}),
-                ("sub", {"extension": ["rv32i"], "encoding": "encoding2"}),
-            ]
-
-            process_standard_instructions(lines, instr_dict, file_name)
-
-            # Verify process_enc_line was called twice (skipping pseudo and import)
-            self.assertEqual(mock_process_enc.call_count, 2)
-
-            # Verify the instruction dictionary was updated correctly
-            self.assertEqual(len(instr_dict), 2)
-            self.assertIn("add", instr_dict)
-            self.assertIn("sub", instr_dict)
-
-
-if __name__ == "__main__":
-    unittest.main()
+
+
+# Split the instruction line into name and remaining part
+def parse_instruction_line(line: str) -> "tuple[str, str]":
+    """Parse the instruction name and the remaining encoding details."""
+    name, remaining = line.split(" ", 1)
+    name = name.replace(".", "_")  # Replace dots for compatibility
+    remaining = remaining.lstrip()  # Remove leading whitespace
+    return name, remaining
+
+
+# Verify Overlapping Bits
+def check_overlapping_bits(encoding: "list[str]", ind: int, line: str):
+    """Check for overlapping bits in the encoding."""
+    if encoding[31 - ind] != "-":
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes'
+        )
+
+
+# Update encoding for fixed ranges
+def update_encoding_for_fixed_range(
+    encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str
+):
+    """
+    Update encoding bits for a given bit range.
+    Checks for overlapping bits and assigns the value accordingly.
+    """
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding, ind, line)
+        bit = str((entry_value >> (ind - lsb)) & 1)
+        encoding[31 - ind] = bit
+
+
+# Process fixed bit patterns
+def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str):
+    """Process fixed bit ranges in the encoding."""
+    for s2, s1, entry in fixed_ranges.findall(remaining):
+        msb, lsb, entry_value = int(s2), int(s1), int(entry, 0)
+
+        # Validate bit range and entry value
+        validate_bit_range(msb, lsb, entry_value, line)
+        update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line)
+
+    return fixed_ranges.sub(" ", remaining)
+
+
+# Process single bit assignments
+def process_single_fixed(remaining: str, encoding: "list[str]", line: str):
+    """Process single fixed assignments in the encoding."""
+    for lsb, value, _drop in single_fixed.findall(remaining):
+        lsb = int(lsb, 0)
+        value = int(value, 0)
+
+        check_overlapping_bits(encoding, lsb, line)
+        encoding[31 - lsb] = str(value)
+
+
+# Main function to check argument look-up table
+def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str):
+    """Check if arguments are present in arg_lut."""
+    for arg in args:
+        if arg not in arg_lut:
+            arg = handle_arg_lut_mapping(arg, name)
+        msb, lsb = arg_lut[arg]
+        update_encoding_args(encoding_args, arg, msb, lsb)
+
+
+# Handle missing argument mappings
+def handle_arg_lut_mapping(arg: str, name: str):
+    """Handle cases where an argument needs to be mapped to an existing one."""
+    parts = arg.split("=")
+    if len(parts) == 2:
+        existing_arg, _new_arg = parts
+        if existing_arg in arg_lut:
+            arg_lut[arg] = arg_lut[existing_arg]
+        else:
+            log_and_exit(
+                f" Found field {existing_arg} in variable {arg} in instruction {name} "
+                f"whose mapping in arg_lut does not exist"
+            )
+    else:
+        log_and_exit(
+            f" Found variable {arg} in instruction {name} "
+            f"whose mapping in arg_lut does not exist"
+        )
+    return arg
+
+
+# Update encoding args with variables
+def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int):
+    """Update encoding arguments and ensure no overlapping."""
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding_args, ind, arg)
+        encoding_args[31 - ind] = arg
+
+
+# Compute match and mask
+def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]":
+    """Convert the encoding list to match and mask strings."""
+    match = "".join(encoding).replace("-", "0")
+    mask = "".join(encoding).replace("0", "1").replace("-", "0")
+    return hex(int(match, 2)), hex(int(mask, 2))
+
+
+class SingleInstr(TypedDict):
+    encoding: str
+    variable_fields: "list[str]"
+    extension: "list[str]"
+    match: str
+    mask: str
+
+
+InstrDict = Dict[str, SingleInstr]
+
+
+# Processing main function for a line in the encoding file
+def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]":
+    """
+    This function processes each line of the encoding files (rv*). As part of
+    the processing, the function ensures that the encoding is legal through the
+    following checks::
+        - there is no over specification (same bits assigned different values)
+        - there is no under specification (some bits not assigned values)
+        - bit ranges are in the format hi..lo=val where hi > lo
+        - value assigned is representable in the bit range
+        - also checks that the mapping of arguments of an instruction exists in
+          arg_lut.
+    If the above checks pass, then the function returns a tuple of the name and
+    a dictionary containing basic information of the instruction which includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    """
+    encoding = initialize_encoding()
+
+    # Parse the instruction line
+    name, remaining = parse_instruction_line(line)
+
+    # Process fixed ranges
+    remaining = process_fixed_ranges(remaining, encoding, line)
+
+    # Process single fixed assignments
+    process_single_fixed(remaining, encoding, line)
+
+    # Convert the list of encodings into a match and mask
+    match, mask = convert_encoding_to_match_mask(encoding)
+
+    # Check arguments in arg_lut
+    args = single_fixed.sub(" ", remaining).split()
+    encoding_args = encoding.copy()
+
+    check_arg_lut(args, encoding_args, name)
+
+    # Return single_dict
+    return name, {
+        "encoding": "".join(encoding),
+        "variable_fields": args,
+        "extension": [os.path.basename(ext)],
+        "match": match,
+        "mask": mask,
+    }
+
+
+# Extract ISA Type
+def extract_isa_type(ext_name: str) -> str:
+    """Extracts the ISA type from the extension name."""
+    return ext_name.split("_")[0]
+
+
+# Verify the types for RV*
+def is_rv_variant(type1: str, type2: str) -> bool:
+    """Checks if the types are RV variants (rv32/rv64)."""
+    return (type2 == "rv" and type1 in {"rv32", "rv64"}) or (
+        type1 == "rv" and type2 in {"rv32", "rv64"}
+    )
+
+
+# Check for same base ISA
+def has_same_base_isa(type1: str, type2: str) -> bool:
+    """Determines if the two ISA types share the same base."""
+    return type1 == type2 or is_rv_variant(type1, type2)
+
+
+# Compare the base ISA type of a given extension name against a list of extension names
+def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool:
+    """Checks if the base ISA type of ext_name matches any in ext_name_list."""
+    type1 = extract_isa_type(ext_name)
+    return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list)
+
+
+# Pad two strings to equal length
+def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]":
+    """Pads two strings to equal length using the given padding character."""
+    max_len = max(len(str1), len(str2))
+    return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char)
+
+
+# Check compatibility for two characters
+def has_no_conflict(char1: str, char2: str) -> bool:
+    """Checks if two characters are compatible (either matching or don't-care)."""
+    return char1 == "-" or char2 == "-" or char1 == char2
+
+
+# Conflict check between two encoded strings
+def overlaps(x: str, y: str) -> bool:
+    """Checks if two encoded strings overlap without conflict."""
+    x, y = pad_to_equal_length(x, y)
+    return all(has_no_conflict(x[i], y[i]) for i in range(len(x)))
+
+
+# Check presence of keys in dictionary.
+def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool:
+    """Checks if key2 exists in the dictionary under key1."""
+    return key1 in a and key2 in a[key1]
+
+
+# Overlap allowance
+def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool:
+    """Determines if overlap is allowed between x and y based on nested dictionary checks"""
+    return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x)
+
+
+# Check overlap allowance between extensions
+def extension_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary."""
+    return overlap_allowed(overlapping_extensions, x, y)
+
+
+# Check overlap allowance between instructions
+def instruction_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary."""
+    return overlap_allowed(overlapping_instructions, x, y)
+
+
+# Check 'nf' field
+def is_segmented_instruction(instruction: SingleInstr) -> bool:
+    """Checks if an instruction contains the 'nf' field."""
+    return "nf" in instruction["variable_fields"]
+
+
+# Expand 'nf' fields
+def update_with_expanded_instructions(
+    updated_dict: InstrDict, key: str, value: SingleInstr
+):
+    """Expands 'nf' fields in the instruction dictionary and updates it with new instructions."""
+    for new_key, new_value in expand_nf_field(key, value):
+        updated_dict[new_key] = new_value
+
+
+# Process instructions, expanding segmented ones and updating the dictionary
+def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict:
+    """Processes instructions, expanding segmented ones and updating the dictionary."""
+    # Use dictionary comprehension for efficiency
+    return dict(
+        chain.from_iterable(
+            (
+                expand_nf_field(key, value)
+                if is_segmented_instruction(value)
+                else [(key, value)]
+            )
+            for key, value in instr_dict.items()
+        )
+    )
+
+
+# Expand the 'nf' field in the instruction dictionary
+def expand_nf_field(
+    name: str, single_dict: SingleInstr
+) -> "list[tuple[str, SingleInstr]]":
+    """Validate and prepare the instruction dictionary."""
+    validate_nf_field(single_dict, name)
+    remove_nf_field(single_dict)
+    update_mask(single_dict)
+
+    name_expand_index = name.find("e")
+
+    # Pre compute the base match value and encoding prefix
+    base_match = int(single_dict["match"], 16)
+    encoding_prefix = single_dict["encoding"][3:]
+
+    expanded_instructions = [
+        create_expanded_instruction(
+            name, single_dict, nf, name_expand_index, base_match, encoding_prefix
+        )
+        for nf in range(8)  # Range of 0 to 7
+    ]
+
+    return expanded_instructions
+
+
+# Validate the presence of 'nf'
+def validate_nf_field(single_dict: SingleInstr, name: str):
+    """Validates the presence of 'nf' in variable fields before expansion."""
+    if "nf" not in single_dict["variable_fields"]:
+        log_and_exit(f"Cannot expand nf field for instruction {name}")
+
+
+# Remove 'nf' from variable fields
+def remove_nf_field(single_dict: SingleInstr):
+    """Removes 'nf' from variable fields in the instruction dictionary."""
+    single_dict["variable_fields"].remove("nf")
+
+
+# Update the mask to include the 'nf' field
+def update_mask(single_dict: SingleInstr):
+    """Updates the mask to include the 'nf' field in the instruction dictionary."""
+    single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29)
+
+
+# Create an expanded instruction
+def create_expanded_instruction(
+    name: str,
+    single_dict: SingleInstr,
+    nf: int,
+    name_expand_index: int,
+    base_match: int,
+    encoding_prefix: str,
+) -> "tuple[str, SingleInstr]":
+    """Creates an expanded instruction based on 'nf' value."""
+    new_single_dict = copy.deepcopy(single_dict)
+
+    # Update match value in one step
+    new_single_dict["match"] = hex(base_match | (nf << 29))
+    new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix
+
+    # Construct new instruction name
+    new_name = (
+        name
+        if nf == 0
+        else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}"
+    )
+
+    return (new_name, new_single_dict)
+
+
+# Return a list of relevant lines from the specified file
+def read_lines(file: str) -> "list[str]":
+    """Reads lines from a file and returns non-blank, non-comment lines."""
+    with open(file) as fp:
+        lines = (line.rstrip() for line in fp)
+        return [line for line in lines if line and not line.startswith("#")]
+
+
+# Update the instruction dictionary
+def process_standard_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+    """Processes standard instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" in line or "$pseudo" in line:
+            continue
+        logging.debug(f"Processing line: {line}")
+        name, single_dict = process_enc_line(line, file_name)
+        ext_name = os.path.basename(file_name)
+
+        if name in instr_dict:
+            var = instr_dict[name]["extension"]
+            if same_base_isa(ext_name, var):
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} is already added from {var} in same base ISA"
+                )
+            elif instr_dict[name]["encoding"] != single_dict["encoding"]:
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} has different encodings in different base ISAs"
+                )
+
+            instr_dict[name]["extension"].extend(single_dict["extension"])
+        else:
+            for key, item in instr_dict.items():
+                if (
+                    overlaps(item["encoding"], single_dict["encoding"])
+                    and not extension_overlap_allowed(ext_name, item["extension"][0])
+                    and not instruction_overlap_allowed(name, key)
+                    and same_base_isa(ext_name, item["extension"])
+                ):
+                    log_and_exit(
+                        f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}'
+                    )
+
+            instr_dict[name] = single_dict
+
+
+# Incorporate pseudo instructions into the instruction dictionary based on given conditions
+def process_pseudo_instructions(
+    lines: "list[str]",
+    instr_dict: InstrDict,
+    file_name: str,
+    opcodes_dir: str,
+    include_pseudo: bool,
+    include_pseudo_ops: "list[str]",
+):
+    """Processes pseudo instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$pseudo" not in line:
+            continue
+        logging.debug(f"Processing pseudo line: {line}")
+        ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0]
+        ext_file = find_extension_file(ext, opcodes_dir)
+
+        validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst)
+
+        name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name)
+        if (
+            orig_inst.replace(".", "_") not in instr_dict
+            or include_pseudo
+            or name in include_pseudo_ops
+        ):
+            if name not in instr_dict:
+                instr_dict[name] = single_dict
+                logging.debug(f"Including pseudo_op: {name}")
+            else:
+                if single_dict["match"] != instr_dict[name]["match"]:
+                    instr_dict[f"{name}_pseudo"] = single_dict
+                # TODO: This expression is always false since both sides are list[str].
+                elif single_dict["extension"] not in instr_dict[name]["extension"]:  # type: ignore
+                    instr_dict[name]["extension"].extend(single_dict["extension"])
+
+
+# Integrate imported instructions into the instruction dictionary
+def process_imported_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str, opcodes_dir: str
+):
+    """Processes imported instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" not in line:
+            continue
+        logging.debug(f"Processing imported line: {line}")
+        import_ext, reg_instr = imported_regex.findall(line)[0]
+        ext_file = find_extension_file(import_ext, opcodes_dir)
+
+        validate_instruction_in_extension(reg_instr, ext_file, file_name, line)
+
+        for oline in open(ext_file):
+            if re.findall(f"^\\s*{reg_instr}\\s+", oline):
+                name, single_dict = process_enc_line(oline, file_name)
+                if name in instr_dict:
+                    if instr_dict[name]["encoding"] != single_dict["encoding"]:
+                        log_and_exit(
+                            f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings"
+                        )
+                    instr_dict[name]["extension"].extend(single_dict["extension"])
+                else:
+                    instr_dict[name] = single_dict
+                break
+
+
+# Locate the path of the specified extension file, checking fallback directories
+def find_extension_file(ext: str, opcodes_dir: str):
+    """Finds the extension file path, considering the unratified directory if necessary."""
+    ext_file = f"{opcodes_dir}/{ext}"
+    if not os.path.exists(ext_file):
+        ext_file = f"{opcodes_dir}/unratified/{ext}"
+        if not os.path.exists(ext_file):
+            log_and_exit(f"Extension {ext} not found.")
+    return ext_file
+
+
+# Confirm the presence of an original instruction in the corresponding extension file.
+def validate_instruction_in_extension(
+    inst: str, ext_file: str, file_name: str, pseudo_inst: str
+):
+    """Validates if the original instruction exists in the dependent extension."""
+    found = False
+    for oline in open(ext_file):
+        if re.findall(f"^\\s*{inst}\\s+", oline):
+            found = True
+            break
+    if not found:
+        log_and_exit(
+            f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}"
+        )
+
+
+# Construct a dictionary of instructions filtered by specified criteria
+def create_inst_dict(
+    file_filter: "list[str]",
+    include_pseudo: bool = False,
+    include_pseudo_ops: "list[str]" = [],
+) -> InstrDict:
+    """Creates a dictionary of instructions based on the provided file filters."""
+
+    """
+    This function return a dictionary containing all instructions associated
+    with an extension defined by the file_filter input.
+    Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc.
+    Each node of the dictionary will correspond to an instruction which again is
+    a dictionary. The dictionary contents of each instruction includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    In order to build this dictionary, the function does 2 passes over the same
+    rv<file_filter> file:
+        - First pass: extracts all standard instructions, skipping pseudo ops
+          and imported instructions. For each selected line, the `process_enc_line`
+          function is called to create the dictionary contents of the instruction.
+          Checks are performed to ensure that the same instruction is not added
+          twice to the overall dictionary.
+        - Second pass: parses only pseudo_ops. For each pseudo_op, the function:
+            - Checks if the dependent extension and instruction exist.
+            - Adds the pseudo_op to the dictionary if the dependent instruction
+              is not already present; otherwise, it is skipped.
+    """
+    opcodes_dir = os.path.dirname(os.path.realpath(__file__))
+    instr_dict: InstrDict = {}
+
+    file_names = [
+        file
+        for fil in file_filter
+        for file in sorted(glob.glob(f"{opcodes_dir}/{fil}"), reverse=True)
+    ]
+
+    logging.debug("Collecting standard instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for standard instructions")
+        lines = read_lines(file_name)
+        process_standard_instructions(lines, instr_dict, file_name)
+
+    logging.debug("Collecting pseudo instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for pseudo instructions")
+        lines = read_lines(file_name)
+        process_pseudo_instructions(
+            lines,
+            instr_dict,
+            file_name,
+            opcodes_dir,
+            include_pseudo,
+            include_pseudo_ops,
+        )
+
+    logging.debug("Collecting imported instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for imported instructions")
+        lines = read_lines(file_name)
+        process_imported_instructions(lines, instr_dict, file_name, opcodes_dir)
+
+    return instr_dict
+
+
+# Extracts the extensions used in an instruction dictionary
+def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]":
+    return list({item["extension"][0] for item in instr_dict.values()})
+
+
+# Returns signed interpretation of a value within a given width
+def signed(value: int, width: int) -> int:
+    return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)