From 15ffef778b2b6739ce2f7346f604c3884fffde85 Mon Sep 17 00:00:00 2001 From: AdUhTkJm <2292398666@qq.com> Date: Fri, 20 Dec 2024 11:24:41 +0800 Subject: [PATCH 1/7] Tidy code and add some documentation --- README.md | 39 +++++++++-- README.zh.md | 30 +++++++-- src/basic_config.ml | 34 +++++----- src/basic_constr_info.ml | 124 ++++++++++++----------------------- src/basic_lst.ml | 57 +++++----------- src/basic_type_path.ml | 91 +++++++++++--------------- src/core_format.ml | 4 +- src/driver_config.ml | 4 +- src/driver_util.ml | 27 +++++--- src/hash.c | 9 ++- src/mi_format.ml | 3 +- src/moon0_main.ml | 94 +++++++++++++++----------- src/name_mangle.ml | 5 +- src/parsing_parse.ml | 2 +- src/riscv.ml | 40 ++++++++++++ src/riscv_reg.ml | 138 +++++++++++++++++++++++++++++++++++++++ src/riscv_ssa.ml | 41 ++++++++++++ src/s.ml | 17 +++-- src/trait_impl.ml | 24 ++----- src/w.ml | 15 +++-- 20 files changed, 497 insertions(+), 301 deletions(-) create mode 100644 src/riscv.ml create mode 100644 src/riscv_reg.ml create mode 100644 src/riscv_ssa.ml diff --git a/README.md b/README.md index 4eebd78..1c0abfb 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Building a programming language is a long journey. It took Rust 9 years and Go 5 - OCaml 4.14.2 - [OPAM](https://opam.ocaml.org/) -You must update MoonBit to the latest version. Otherwise, `moonc` can fail with segmentation fault, since the binaries of the language core is not compatible. +You must update or revert MoonBit to [this version](https://github.com/moonbitlang/core/commit/4660d8b3da6ed79e47462d66d40feff177060699), as the syntax of the language has changed since. ### Build @@ -39,21 +39,44 @@ opam install -y dune dune build -p moonbit-lang ``` +You would also need to build the core library, as instructed in the following section. + ### Usage -MoonBit's core library is typically installed in `~/.moon/lib/core/`. In following commands, we use `$core` to denote the path. Under `$core/target`, there are folders containing pre-built libraries under different targets: `js`, `wasm` and `wasm-gc`. Let `$target` stand for one of these three. +MoonBit's core library is typically installed in `~/.moon/lib/core/`. In following commands, we use `$core` to denote the path. The language is shipped with pre-built libraries under different targets: `js`, `wasm` and `wasm-gc`; however, this compiler currently supports only `wasm-gc`. Let `$target` stand for this value. We use `$src` to denote the path to your main package. This package must contain, along with your source files, a `moon.pkg.json`; if you're not sure how this works, you can use [moon](https://github.com/moonbitlang/moon) to initialize a MoonBit repository. -We use `$obj` to indicate path where object files should be generated; they typically carry a suffix `.core`. We use `$dest` to represent target files, which might be `.js` or `.wasm` according to your target choice. +We use `$obj` to indicate path where object files should be generated; they typically carry suffixes `.core` and `.mi`. + +We use `$dest` to represent target files, which might be `.wat` or `.wasm`, but no other choices are allowed. -Compile files with these commands: +To set up the environment, execute these commands (you only need to do it once): + +```bash +# Remove currently installed MoonBit version +rm -rf $core + +# Install the specific version required by the compiler +git clone https://github.com/moonbitlang/core.git $core +git checkout 4660d8b + +# Compile the core library +moon bundle --source-dir $core +``` + +We strongly recommend that you build the core library yourself via the commands above. The pre-built binaries are not always compatible with this compiler, as MoonBit is still under development. + +You should verify that there is a folder called `wasm-gc` under `$core/target`. + +Now you can compile `.mbt` files with these commands: ```bash bundled=$core/target/$target/release/bundle # Here, main.mbt should be a file containing `fn main`. -moonc build-package $src/main.mbt -is-main -std-path $bundled -o $obj -target $target +# `build-package` produces intermediate representation (IR); it is ignorant of target. +moonc build-package $src/main.mbt -is-main -std-path $bundled -o $obj # If you have more than one package, remember to include all of them in -pkg-sources. They should be separated by colon ':'. moonc link-core $bundled/core.core $obj -o $dest -pkg-config-path $src/moon.pkg.json -pkg-sources $core:$src -target $target @@ -61,6 +84,8 @@ moonc link-core $bundled/core.core $obj -o $dest -pkg-config-path $src/moon.pkg. Then `$dest` would be available for use. +In case you are still in doubt, refer to the output of `moon run --dry-run`. + ## Contributing The project is evolving extremely fast that it is not yet ready for massive community contributions. @@ -68,7 +93,7 @@ The project is evolving extremely fast that it is not yet ready for massive comm If you do have interest in contributing, thank you! Please sign the [CLA](https://www.moonbitlang.com/cla/moonc) first. -For small bug fixes, you are welcome to send the patch to [our email](mailto:jichuruanjian@idea.edu.cn). For large contributions, it is recommended to open a discussion first in our [community forum](https://discuss.moonbitlang.com). +For small bug fixes, you are welcome to send the patch to [our email](mailto:jichuruanjian@idea.edu.cn). For large contributions, it is recommended to open a discussion first in our [community forum](https://discuss.moonbitlang.com). ## LICENSE @@ -89,5 +114,5 @@ In the past two years, our team worked hard to improve MoonBit and its toolchain We are grateful for the support of the community. Special thanks to Jane Street for their excellent PPX libraries, -this repo has used some of their [PPX functions](./src/hash.c). +for this repo has used some of their [PPX functions](./src/hash.c). diff --git a/README.zh.md b/README.zh.md index 1273518..a235198 100644 --- a/README.zh.md +++ b/README.zh.md @@ -27,11 +27,11 @@ - OCaml 4.14.2 - [OPAM](https://opam.ocaml.org/) -MoonBit 必须升级到最新版本,否则 `moonc` 可能会段错误。这是因为预编译的旧版本 MoonBit 核心可能和编译器不兼容。 +MoonBit 必须升级/降级到[特定版本](https://github.com/moonbitlang/core/commit/4660d8b3da6ed79e47462d66d40feff177060699),因为语言的语法已经改变。 ### 构建 -使用下列脚本构建 +使用下列脚本构建: ``` opam switch create 4.14.2 @@ -45,9 +45,29 @@ MoonBit 的核心库一般安装在 `~/.moon/lib/core` 下。在下面的命令 `$src` 表示源代码的路径;在这个文件夹下,除了源代码之外还必须包括一个 `moon.pkg.json`。如果你不清楚如何编写这个文件,可以考虑使用 [moon](https://github.com/moonbitlang/moon) 来初始化。 -`$obj` 表示生成中间文件的位置。这些中间文件通常以 `.core` 作为后缀。而 `$dest` 则表示目标文件生成的路径,它们的后缀根据 `$target` 的选择不同而在 `.js` or `.wasm` 中变化。 +我们用 `$obj` 表示中间文件生成的地方。它们一般以 `.core` 或者 `.mi` 作为后缀。 -编译所需的命令如下: +我们用 `$dest` 表示目标文件生成的地方。它可以是 `.wat` 或 `.wasm`,但不允许其他后缀。 + +为了搭建运行环境,请执行如下命令(只需要执行一次): + +```bash +# 移除已经安装的核心库 +rm -rf $core + +# 安装指定的版本 +git clone https://github.com/moonbitlang/core.git $core +git checkout 4660d8b + +# 编译 +moon bundle --source-dir $core +``` + +我们强烈建议使用上面的命令重新编译一次标准库。已经构建好的二进制文件可能和这个编译器不兼容。 + +执行完成后,你应当能在 `$core/target/` 下发现文件夹 `wasm-gc`。 + +现在你可以使用这些命令来编译 `.mbt` 文件: ```bash bundled=$core/target/$target/release/bundle @@ -61,6 +81,8 @@ moonc link-core $bundled/core.core $obj -o $dest -pkg-config-path $src/moon.pkg. 执行后,`$dest` 就是编译好的目标代码了。 +如果你仍有疑问,可以参考 `moon run --dry-run` 的输出。 + ## 贡献 这个项目正在快速演进,因此还没有准备好接受大量社区贡献。 diff --git a/src/basic_config.ml b/src/basic_config.ml index 121310f..27f5d06 100644 --- a/src/basic_config.ml +++ b/src/basic_config.ml @@ -15,41 +15,37 @@ module Map_string = Basic_map_string -type target = Wasm_gc +type target = Wasm_gc | Riscv include struct - let _ = fun (_ : target) -> () - let sexp_of_target = (function Wasm_gc -> S.Atom "Wasm_gc" : target -> S.t) - let _ = sexp_of_target + let sexp_of_target target = match target with + | Wasm_gc -> S.Atom "Wasm_gc" + | Riscv -> S.Atom "Riscv" - let (hash_fold_target : Ppx_base.state -> target -> Ppx_base.state) = - (fun hsv arg -> Ppx_base.hash_fold_int hsv (match arg with Wasm_gc -> 1) - : Ppx_base.state -> target -> Ppx_base.state) + let hash_fold_target hsv arg = + Ppx_base.hash_fold_int hsv (match arg with + | Wasm_gc -> 1 + | Riscv -> 2) - let _ = hash_fold_target - - let (hash_target : target -> Ppx_base.hash_value) = - let func arg = + let hash_target arg = Ppx_base.get_hash_value - (let hsv = Ppx_base.create () in - hash_fold_target hsv arg) - in - fun x -> func x + (let hsv = Ppx_base.create () in hash_fold_target hsv arg) - let _ = hash_target let equal_target = (Stdlib.( = ) : target -> target -> bool) - let _ = equal_target end type js_format = Esm | Cjs | Iife type error_format = Human | Json -let parse_target_exn = function "wasm-gc" -> Wasm_gc | _ -> assert false +let parse_target_exn = function + | "wasm-gc" -> Wasm_gc + | "riscv" -> Riscv + | other -> raise (Arg.Bad ("unsupported target: " ^ other)) let parse_error_format_exn = function | "human" -> Human | "json" -> Json - | _ -> assert false + | other -> raise (Arg.Bad ("unsupported parse error format: " ^ other)) let mi_magic_str = "MINTF230520" let core_magic_str = "MCORE240123" diff --git a/src/basic_constr_info.ml b/src/basic_constr_info.ml index 3ce7b4a..3df0b3c 100644 --- a/src/basic_constr_info.ml +++ b/src/basic_constr_info.ml @@ -29,98 +29,54 @@ type constr_tag = } include struct - let _ = fun (_ : constr_tag) -> () + let compare_constr_tag a b = + if a == b then 0 + else + match (a, b) with + | Constr_tag_regular a1, Constr_tag_regular b1 -> + Stdlib.compare (a1.index : int) b1.index + | Constr_tag_regular _, _ -> -1 + | _, Constr_tag_regular _ -> 1 + | Extensible_tag a1, Extensible_tag b1 -> ( + match Stdlib.compare a1.pkg b1.pkg with + | 0 -> ( + match Stdlib.compare a1.type_name b1.type_name with + | 0 -> Stdlib.compare a1.name b1.name + | n -> n) + | n -> n) - let compare_constr_tag = - (fun a__001_ b__002_ -> - if Stdlib.( == ) a__001_ b__002_ then 0 - else - match (a__001_, b__002_) with - | Constr_tag_regular _a__003_, Constr_tag_regular _b__004_ -> - Stdlib.compare (_a__003_.index : int) _b__004_.index - | Constr_tag_regular _, _ -> -1 - | _, Constr_tag_regular _ -> 1 - | Extensible_tag _a__005_, Extensible_tag _b__006_ -> ( - match Stdlib.compare (_a__005_.pkg : string) _b__006_.pkg with - | 0 -> ( - match - Stdlib.compare - (_a__005_.type_name : string) - _b__006_.type_name - with - | 0 -> Stdlib.compare (_a__005_.name : string) _b__006_.name - | n -> n) - | n -> n) - : constr_tag -> constr_tag -> int) + let equal_constr_tag a b = + if a == b then true + else + match (a, b) with + | Constr_tag_regular a1, Constr_tag_regular b1 -> + a1.index = b1.index + | Constr_tag_regular _, _ -> false + | _, Constr_tag_regular _ -> false + | Extensible_tag a1, Extensible_tag b1 -> + (a1.pkg = b1.pkg) && + (a1.type_name = b1.type_name) && + (a1.name = b1.name) - let _ = compare_constr_tag - let equal_constr_tag = - (fun a__007_ b__008_ -> - if Stdlib.( == ) a__007_ b__008_ then true - else - match (a__007_, b__008_) with - | Constr_tag_regular _a__009_, Constr_tag_regular _b__010_ -> - Stdlib.( = ) (_a__009_.index : int) _b__010_.index - | Constr_tag_regular _, _ -> false - | _, Constr_tag_regular _ -> false - | Extensible_tag _a__011_, Extensible_tag _b__012_ -> - Stdlib.( && ) - (Stdlib.( = ) (_a__011_.pkg : string) _b__012_.pkg) - (Stdlib.( && ) - (Stdlib.( = ) - (_a__011_.type_name : string) - _b__012_.type_name) - (Stdlib.( = ) (_a__011_.name : string) _b__012_.name)) - : constr_tag -> constr_tag -> bool) + let hash_fold_constr_tag hsv (arg: constr_tag) = + match arg with + | Constr_tag_regular _ir -> + let hsv = Ppx_base.hash_fold_int hsv 0 in + let hsv = Ppx_base.hash_fold_int hsv _ir.index + in hsv + | Extensible_tag _ir -> + let hsv = Ppx_base.hash_fold_int hsv 1 in + let hsv = Ppx_base.hash_fold_string hsv _ir.pkg in + let hsv = Ppx_base.hash_fold_string hsv _ir.type_name in + let hsv = Ppx_base.hash_fold_string hsv _ir.name + in hsv - let _ = equal_constr_tag - - let (hash_fold_constr_tag : Ppx_base.state -> constr_tag -> Ppx_base.state) = - (fun hsv arg -> - match arg with - | Constr_tag_regular _ir -> - let hsv = Ppx_base.hash_fold_int hsv 0 in - let hsv = - let hsv = - let hsv = - let hsv = hsv in - hsv - in - Ppx_base.hash_fold_int hsv _ir.index - in - hsv - in - hsv - | Extensible_tag _ir -> - let hsv = Ppx_base.hash_fold_int hsv 1 in - let hsv = - let hsv = - let hsv = - let hsv = - let hsv = hsv in - Ppx_base.hash_fold_string hsv _ir.pkg - in - Ppx_base.hash_fold_string hsv _ir.type_name - in - Ppx_base.hash_fold_string hsv _ir.name - in - hsv - in - hsv - : Ppx_base.state -> constr_tag -> Ppx_base.state) - - let _ = hash_fold_constr_tag - - let (hash_constr_tag : constr_tag -> Ppx_base.hash_value) = - let func arg = + let hash_constr_tag arg = Ppx_base.get_hash_value (let hsv = Ppx_base.create () in hash_fold_constr_tag hsv arg) - in - fun x -> func x - let _ = hash_constr_tag end let sexp_of_constr_tag (tag : constr_tag) = diff --git a/src/basic_lst.ml b/src/basic_lst.ml index 402b082..c31deb5 100644 --- a/src/basic_lst.ml +++ b/src/basic_lst.ml @@ -12,47 +12,26 @@ . *) +(** +This file is re-implementing basic functionalities of the OCaml standard library `List`. + +The intention is that, while OCaml places the function as the first argument +(like `List.map (fun x -> x + 1) l`), +this might be less readable for longer functions. + +Therefore, we reverse the argument order in Basic_lst. +You could always choose between this version and the standard library version for more readable code. + +Additionally, this file also introduces more facilities than the standard library. +*) module Unsafe_external = Basic_unsafe_external module Arr = Basic_arr open Unsafe_external -let rec map l f = - match l with - | [] -> [] - | x1 :: [] -> - let y1 = f x1 in - [ y1 ] - | [ x1; x2 ] -> - let y1 = f x1 in - let y2 = f x2 in - [ y1; y2 ] - | [ x1; x2; x3 ] -> - let y1 = f x1 in - let y2 = f x2 in - let y3 = f x3 in - [ y1; y2; y3 ] - | [ x1; x2; x3; x4 ] -> - let y1 = f x1 in - let y2 = f x2 in - let y3 = f x3 in - let y4 = f x4 in - [ y1; y2; y3; y4 ] - | x1 :: x2 :: x3 :: x4 :: x5 :: tail -> - let y1 = f x1 in - let y2 = f x2 in - let y3 = f x3 in - let y4 = f x4 in - let y5 = f x5 in - y1 :: y2 :: y3 :: y4 :: y5 :: map tail f +let rec map l f = List.map f l -let rec has_string (l : string list) f = - match l with - | [] -> false - | x1 :: [] -> x1 = f - | [ x1; x2 ] -> x1 = f || x2 = f - | [ x1; x2; x3 ] -> x1 = f || x2 = f || x3 = f - | x1 :: x2 :: x3 :: x4 -> x1 = f || x2 = f || x3 = f || has_string x4 f +let rec has_string (l : string list) query = List.mem query l let rec map_combine l1 l2 f = match (l1, l2) with @@ -825,13 +804,7 @@ let rec mem_string (xs : string list) (x : string) = let rec mem_int (xs : int list) (x : int) = match xs with [] -> false | a :: l -> a = x || mem_int l x -let filter lst p = - let rec find ~p accu lst = - match lst with - | [] -> rev accu - | x :: l -> if p x then find (x :: accu) l ~p else find accu l ~p - in - find [] lst ~p +let filter lst p = List.filter p lst let rec check_duplicate (xs : string list) = match xs with diff --git a/src/basic_type_path.ml b/src/basic_type_path.ml index 8610f5c..7208f96 100644 --- a/src/basic_type_path.ml +++ b/src/basic_type_path.ml @@ -122,46 +122,41 @@ include struct let _ = compare - let rec (hash_fold_t : Ppx_base.state -> t -> Ppx_base.state) = - (fun hsv arg -> - match arg with - | T_unit -> Ppx_base.hash_fold_int hsv 0 - | T_bool -> Ppx_base.hash_fold_int hsv 1 - | T_byte -> Ppx_base.hash_fold_int hsv 2 - | T_char -> Ppx_base.hash_fold_int hsv 3 - | T_int -> Ppx_base.hash_fold_int hsv 4 - | T_int64 -> Ppx_base.hash_fold_int hsv 5 - | T_uint -> Ppx_base.hash_fold_int hsv 6 - | T_uint64 -> Ppx_base.hash_fold_int hsv 7 - | T_float -> Ppx_base.hash_fold_int hsv 8 - | T_double -> Ppx_base.hash_fold_int hsv 9 - | T_string -> Ppx_base.hash_fold_int hsv 10 - | T_option -> Ppx_base.hash_fold_int hsv 11 - | T_result -> Ppx_base.hash_fold_int hsv 12 - | T_error_value_result -> Ppx_base.hash_fold_int hsv 13 - | T_fixedarray -> Ppx_base.hash_fold_int hsv 14 - | T_bytes -> Ppx_base.hash_fold_int hsv 15 - | T_ref -> Ppx_base.hash_fold_int hsv 16 - | T_error -> Ppx_base.hash_fold_int hsv 17 - | Toplevel _ir -> - let hsv = Ppx_base.hash_fold_int hsv 18 in - let hsv = - let hsv = hsv in - Ppx_base.hash_fold_string hsv _ir.pkg - in - Ppx_base.hash_fold_string hsv _ir.id - | Tuple _a0 -> - let hsv = Ppx_base.hash_fold_int hsv 19 in - let hsv = hsv in - Ppx_base.hash_fold_int hsv _a0 - | Constr _ir -> - let hsv = Ppx_base.hash_fold_int hsv 20 in - let hsv = - let hsv = hsv in - hash_fold_t hsv _ir.ty - in - Constr_info.hash_fold_constr_tag hsv _ir.tag - : Ppx_base.state -> t -> Ppx_base.state) + let rec hash_fold_t hsv arg = + match arg with + | T_unit -> Ppx_base.hash_fold_int hsv 0 + | T_bool -> Ppx_base.hash_fold_int hsv 1 + | T_byte -> Ppx_base.hash_fold_int hsv 2 + | T_char -> Ppx_base.hash_fold_int hsv 3 + | T_int -> Ppx_base.hash_fold_int hsv 4 + | T_int64 -> Ppx_base.hash_fold_int hsv 5 + | T_uint -> Ppx_base.hash_fold_int hsv 6 + | T_uint64 -> Ppx_base.hash_fold_int hsv 7 + | T_float -> Ppx_base.hash_fold_int hsv 8 + | T_double -> Ppx_base.hash_fold_int hsv 9 + | T_string -> Ppx_base.hash_fold_int hsv 10 + | T_option -> Ppx_base.hash_fold_int hsv 11 + | T_result -> Ppx_base.hash_fold_int hsv 12 + | T_error_value_result -> Ppx_base.hash_fold_int hsv 13 + | T_fixedarray -> Ppx_base.hash_fold_int hsv 14 + | T_bytes -> Ppx_base.hash_fold_int hsv 15 + | T_ref -> Ppx_base.hash_fold_int hsv 16 + | T_error -> Ppx_base.hash_fold_int hsv 17 + | Toplevel _ir -> + let hsv = Ppx_base.hash_fold_int hsv 18 in + let hsv = + let hsv = hsv in + Ppx_base.hash_fold_string hsv _ir.pkg + in + Ppx_base.hash_fold_string hsv _ir.id + | Tuple _a0 -> + let hsv = Ppx_base.hash_fold_int hsv 19 in + let hsv = hsv in + Ppx_base.hash_fold_int hsv _a0 + | Constr _ir -> + let hsv = Ppx_base.hash_fold_int hsv 20 in + let hsv = hash_fold_t hsv _ir.ty in + Constr_info.hash_fold_constr_tag hsv _ir.tag and (hash : t -> Ppx_base.hash_value) = let func arg = @@ -306,20 +301,8 @@ let sexp_of_t t = let toplevel_type ~pkg t = Toplevel { pkg; id = t } let constr ~ty ~tag = Constr { ty; tag } -let tuple_2 = Tuple 2 -let tuple_3 = Tuple 3 -let tuple_4 = Tuple 4 -let tuple_5 = Tuple 5 -let tuple_6 = Tuple 6 - -let tuple n = - match n with - | 2 -> tuple_2 - | 3 -> tuple_3 - | 4 -> tuple_4 - | 5 -> tuple_5 - | 6 -> tuple_6 - | _ -> Tuple n + +let tuple n = Tuple n let rec get_pkg t = match t with diff --git a/src/core_format.ml b/src/core_format.ml index 5bf512a..0e935f2 100644 --- a/src/core_format.ml +++ b/src/core_format.ml @@ -234,7 +234,7 @@ let import ~(path : string) : t array = try In_channel.with_open_bin path (fun ic -> Stdlib.In_channel.input_all ic |> of_string) - with Sys_error msg -> + with Sys_error _ -> raise (Arg.Bad ("cannot open file: " ^ path)) let dump_serialized_from_t (t : t array) : S.t = @@ -260,7 +260,7 @@ let bundle ~inputs ~path = | Some s when String.equal s magic_str -> (Marshal.from_channel ic : serialized array) | _ -> (raise (Arg.Bad ("invalid MoonBit object file: " ^ path)))) - with Sys_error msg -> + with Sys_error _ -> raise (Arg.Bad ("cannot open file: " ^ path))) |> Array.concat in diff --git a/src/driver_config.ml b/src/driver_config.ml index 4e3c4a1..990808d 100644 --- a/src/driver_config.ml +++ b/src/driver_config.ml @@ -96,10 +96,10 @@ module Linkcore_Opt = struct " output plain style wat" ); ( "-target", Arg.Symbol - ( [ "wasm-gc"; "wasm"; "js"; "native" ], + ( [ "wasm-gc"; "wasm"; "js"; "riscv" ], fun target -> Config.target := Basic_config.parse_target_exn target ), - "set compilation target. available targets: wasm, wasm-gc, js, native" + "set compilation target. available targets: wasm, wasm-gc, js, riscv" ); ("-no-dts", Arg.Clear emit_dts, "Do not emit typescript declaration file"); ( "-js-format", diff --git a/src/driver_util.ml b/src/driver_util.ml index 98ee135..1459657 100644 --- a/src/driver_util.ml +++ b/src/driver_util.ml @@ -46,6 +46,9 @@ type target = clam_callback : clam_passes -> Clam.prog -> unit; sexp_callback : W.t list -> unit; } + | Riscv of { + sexp_callback : Riscv.t list -> unit; + } let parse ~diagnostics ~(debug_tokens : bool) (input : mbt_input) : Parsing_parse.output = @@ -53,7 +56,7 @@ let parse ~diagnostics ~(debug_tokens : bool) (input : mbt_input) : | File_Path path -> (try Parsing_parse.parse ~diagnostics ~debug_tokens ~transform:false path - with Sys_error msg -> + with Sys_error _ -> raise (Arg.Bad ("cannot open file: " ^ path))) | Name_Content (name, content) -> Parsing_parse.impl_of_string ~name ~debug_tokens ~diagnostics @@ -233,13 +236,16 @@ let clam_of_mcore ~(elim_unused_let : bool) (core : Mcore.t) ~clam_callback : Pass_unused_let.unused_let_opt |-> clam_callback `Clam_End -let wasm_gen ~(elim_unused_let : bool) (core : Mcore.t) ~(target : target) = - match target with - | Wasm_gc { clam_callback; _ } -> - core - |> clam_of_mcore ~elim_unused_let ~clam_callback - |> Wasm_of_clam_gc.compile - |> fun sexp -> Wat sexp +let wasm_gen ~(elim_unused_let : bool) (core : Mcore.t) ~clam_callback = + core + |> clam_of_mcore ~elim_unused_let ~clam_callback + |> Wasm_of_clam_gc.compile + |> fun sexp -> Wat sexp + +let riscv_gen (core : Mcore.t) = + core + |> Riscv_ssa.ssa_of_mcore + |> Riscv.regalloc let link_core ~(shrink_wasm : bool) ~(elim_unused_let : bool) ~(core_inputs : core_input Basic_vec.t) @@ -258,12 +264,13 @@ let link_core ~(shrink_wasm : bool) ~(elim_unused_let : bool) (Exported_functions.Export_selected exported_functions) in match target with - | Wasm_gc { sexp_callback; _ } -> ( - let mod_and_callback = mono_core |> wasm_gen ~elim_unused_let ~target in + | Wasm_gc { sexp_callback; clam_callback } -> ( + let mod_and_callback = mono_core |> wasm_gen ~elim_unused_let ~clam_callback in match mod_and_callback with | Wat sexp -> (if shrink_wasm then Pass_shrink_wasm.shrink sexp else sexp) |> sexp_callback) + | Riscv { sexp_callback; _ } -> riscv_gen mono_core |> sexp_callback let gen_test_info ~(diagnostics : Diagnostics.t) ~(json : bool) (mbt_files : mbt_input list) : string = diff --git a/src/hash.c b/src/hash.c index 0db0025..08614ef 100755 --- a/src/hash.c +++ b/src/hash.c @@ -21,9 +21,16 @@ SOFTWARE. */ -// copied from [https://github.com/janestreet/base/blob/master/hash_types/src/internalhash_stubs.c] +/* + * Initially copied from [https://github.com/janestreet/base/blob/master/hash_types/src/internalhash_stubs.c] + */ #include + +/* + * These headers are typically under /usr/lib/ocaml, if you install ocaml in the default way. + * Don't worry, Dune will sort them right. + */ #include #include diff --git a/src/mi_format.ml b/src/mi_format.ml index a8e616e..f9b2107 100644 --- a/src/mi_format.ml +++ b/src/mi_format.ml @@ -235,8 +235,7 @@ let serialized_of_pkg_info (mi_view : Pkg_info.mi_view) : Serialize.serialized = name = mi_view.name; } -let pkg_info_of_serialized (pkg_info : Serialize.serialized) : Pkg_info.mi_view - = +let pkg_info_of_serialized (pkg_info: Serialize.serialized): Pkg_info.mi_view = let external_constrs = Hash_string.create 17 in Arr.iter pkg_info.export_types (fun (_, ty_decl) -> match ty_decl.ty_desc with diff --git a/src/moon0_main.ml b/src/moon0_main.ml index 9e14b76..4ceaa05 100644 --- a/src/moon0_main.ml +++ b/src/moon0_main.ml @@ -18,9 +18,7 @@ module Config = Basic_config module Parse = Parsing_parse module Lst = Basic_lst -let ( |-> ) obj callback = - callback obj; - obj +let ( |-> ) obj callback = callback obj; obj let rec make_directory dir = if Sys.file_exists dir then () @@ -48,9 +46,15 @@ let check_usage = "moonc check [options] " let compile_usage = "moonc compile [options] " let gen_test_info_usage = "moonc gen-test-info [options] " let postprocess_ast = Driver_util.postprocess_ast ~diagnostics + +(** +Converts a string into S-expression and write it in a file. + +See s.ml for more about S-expressions. +*) let write_s name sexp = if not !no_intermediate_file then Io.write_s name sexp -(* TAST here stands for typed AST *) +(** TAST stands for typed AST. *) let tast_of_ast ~name ~build_context (asts : Parse.output list) : Typedtree.output * Global_env.t = let write_opt suffix sexp = write_s (name ^ suffix) sexp in @@ -99,13 +103,7 @@ let wasm_gen ~(name : string) c = | `Clam_Unused_Let -> write_opt ".unused_let.clam" (Clam.sexp_of_prog clam) | `Clam_End -> write_s (name ^ ".o.clam") (Clam.sexp_of_prog clam) in - - let target = - match !Config.target with - | Wasm_gc -> - Driver_util.Wasm_gc { clam_callback; sexp_callback = (fun _ -> ()) } - in - Driver_util.wasm_gen ~elim_unused_let:!elim_unused_let c ~target + Driver_util.wasm_gen ~elim_unused_let:!elim_unused_let c ~clam_callback let source_loader ~pkg file = let path = Pkg_path_tbl.resolve_source Loc.pkg_path_tbl ~pkg ~file in @@ -140,11 +138,14 @@ let wat_gen (sexp : W.t list) = ~ignores: [ "source_name"; "source_pos"; "source_type"; "prologue_end" ] s); - Lst.iter ss (fun s -> + List.iter (fun s -> Buffer.add_string buf "\n"; - Buffer.add_string buf (W.to_string s))); + Buffer.add_string buf (W.to_string s)) ss); Buffer.contents buf +let riscv_gen (sexp : Riscv.t list) = + List.map Riscv.to_asm_string sexp |> String.concat "\n" + let bundle_core () = let output_file = ref "" in let inputs = ref [] in @@ -164,39 +165,55 @@ let link_core () = let input_files : Driver_util.core_input Basic_vec.t = Basic_vec.empty () in let exported_functions = Driver_config.Linkcore_Opt.exported_functions in let link_core_spec = Driver_config.Linkcore_Opt.spec in + + (* Parse argument and check input validity *) Arg.parse_argv ~current:(ref 1) Sys.argv link_core_spec (fun input_file -> Basic_vec.push input_files (Driver_util.Core_Path input_file)) link_core_usage; + if Basic_vec.is_empty input_files && !output_file = "" then ( Arg.usage link_core_spec link_core_usage; exit 0); + + if !output_file = "" then ( + raise (Arg.Bad ("output file unspecified")) + ); + + (* Load config file *) Basic_config.current_package := !link_main; - (if Sys.file_exists !pkg_config_path then - let json_data = - Json_parse.parse_json_from_file ~diagnostics:(Diagnostics.make ()) - ~fname:(Filename.basename !pkg_config_path) - !pkg_config_path - in - Pkg_config_util.link_core_load_pkg_config json_data); + if Sys.file_exists !pkg_config_path then + (let json_data = + Json_parse.parse_json_from_file ~diagnostics:(Diagnostics.make ()) + ~fname:(Filename.basename !pkg_config_path) + !pkg_config_path + in Pkg_config_util.link_core_load_pkg_config json_data); + + + (* Generates target code *) let on_source_map blob = Io.write (!output_file ^ ".map") blob in - let sexp_callback sexp = - match !output_file with - | "" -> - prerr_string "unspecified output file"; - exit 2 - | filename -> - if Filename.check_suffix filename ".wat" then - Io.write filename (wat_gen sexp) - else if Filename.check_suffix filename ".wasm" then - Io.write filename - (wasm_bin_gen ~file:filename ~on_source_map (Driver_util.Wat sexp)) - else prerr_string (filename ^ ": unrecognized file type") + let wasm_gen_target sexp = + let filename = !output_file in + if Filename.check_suffix filename ".wat" then + Io.write filename (wat_gen sexp) + else if Filename.check_suffix filename ".wasm" then + Io.write filename + (wasm_bin_gen ~file:filename ~on_source_map (Driver_util.Wat sexp)) + else raise (Arg.Bad ("unrecognized output file type: " ^ filename ^ "; must be one of .wat or .wasm")) in + + let riscv_gen_target sexp = + (* No need to check file type as in wasm. *) + (* We will write RISC-V assembly anyway. *) + Io.write !output_file (riscv_gen sexp) + in + let target = match !Config.target with | Wasm_gc -> - Driver_util.Wasm_gc { clam_callback = (fun _ _ -> ()); sexp_callback } + Driver_util.Wasm_gc { clam_callback = (fun _ _ -> ()); sexp_callback = wasm_gen_target } + | Riscv -> + Driver_util.Riscv { sexp_callback = riscv_gen_target } in Driver_util.link_core ~shrink_wasm:!shrink_wasm ~elim_unused_let:!elim_unused_let ~core_inputs:input_files @@ -223,9 +240,9 @@ let build_package () = let pkg_name = !Basic_config.current_package in make_directory output_dir; let std_import = Driver_util.Std_Path !Basic_config.std_path in - let imports = Lst.map !mi_files (fun imp -> Driver_util.Import_Path imp) in + let imports = List.map (fun imp -> Driver_util.Import_Path imp) !mi_files in let mbt_files = - Lst.map !input_files (fun path -> Driver_util.File_Path path) + List.map (fun path -> Driver_util.File_Path path) !input_files in let profile_callback asts = asts in let debug_source_callback _ _ = () @@ -292,10 +309,8 @@ let check () = let output_dir = Filename.dirname !output_file in if not !no_mi then make_directory output_dir; let std_import = Driver_util.Std_Path !Basic_config.std_path in - let imports = Lst.map !mi_files (fun imp -> Driver_util.Import_Path imp) in - let mbt_files = - Lst.map !input_files (fun path -> Driver_util.File_Path path) - in + let imports = List.map (fun imp -> Driver_util.Import_Path imp) !mi_files in + let mbt_files = List.map (fun path -> Driver_util.File_Path path) !input_files in let genv_callback genv = if not !no_mi then Global_env.export_mi ~action:(Write_file !output_file) @@ -424,6 +439,7 @@ let compile () = | Wasm_gc -> let mod_ = wasm_gen ~name mono_core in postprecess mod_ + | Riscv -> failwith "TODO" (* TODO *) with Exit -> () in Arg.parse_argv ~current:(ref 1) Sys.argv spec diff --git a/src/name_mangle.ml b/src/name_mangle.ml index a73910c..ef0a403 100644 --- a/src/name_mangle.ml +++ b/src/name_mangle.ml @@ -59,9 +59,8 @@ let make_type_name (t : Mtype.t) : Tid.t = buf +>> "UnsafeMaybeUninit<"; go t; buf +>> ">" - | T_error_value_result { id } -> ( - match !Basic_config.target with - | Wasm_gc -> buf +>> Mtype.id_to_string id) + | T_error_value_result { id } -> + buf +>> Mtype.id_to_string id and gos (sep : char) = function | [] -> () | ty :: [] -> go ty diff --git a/src/parsing_parse.ml b/src/parsing_parse.ml index ab2380e..02d75cf 100644 --- a/src/parsing_parse.ml +++ b/src/parsing_parse.ml @@ -127,5 +127,5 @@ let parse ~diagnostics ?(debug_tokens = false) ?directive_handler ~transform In_channel.with_open_bin path In_channel.input_all |> impl_of_string ~diagnostics ~debug_tokens ~transform ?directive_handler ~name:(Filename.basename path) - with Sys_error msg -> + with Sys_error _ -> raise (Arg.Bad ("cannot open file: " ^ path)) diff --git a/src/riscv.ml b/src/riscv.ml new file mode 100644 index 0000000..cbd56fd --- /dev/null +++ b/src/riscv.ml @@ -0,0 +1,40 @@ +(* RISC-V assembly commands. *) + +module Reg = Riscv_reg + +type label = string + +type t = +| Add of Reg.t * Reg.t * Reg.t +| Sub of Reg.t * Reg.t * Reg.t +| Mul of Reg.t * Reg.t * Reg.t +| Div of Reg.t * Reg.t * Reg.t +| Call of label +| Label of label + +let to_string asm = + let convert_3reg ty rd rs1 rs2 = + let rd_str = Reg.to_string rd in + let rs1_str = Reg.to_string rs1 in + let rs2_str = Reg.to_string rs2 in + Printf.sprintf "%s %s, %s, %s" ty rd_str rs1_str rs2_str + in + match asm with + | Add (rd, rs1, rs2) -> convert_3reg "add" rd rs1 rs2 + | Sub (rd, rs1, rs2) -> convert_3reg "sub" rd rs1 rs2 + | Mul (rd, rs1, rs2) -> convert_3reg "mul" rd rs1 rs2 + | Div (rd, rs1, rs2) -> convert_3reg "div" rd rs1 rs2 + | Call label -> Printf.sprintf "call %s" label + | Label label -> Printf.sprintf "%s:" label + +(** +Used when emitting assembly. + +We expect every non-label command to be indented by 4 spaces. +*) +let to_asm_string asm = + match asm with + | Label _ -> to_string asm + | _ -> " " ^ to_string asm + +let regalloc ssa = [] \ No newline at end of file diff --git a/src/riscv_reg.ml b/src/riscv_reg.ml new file mode 100644 index 0000000..7627734 --- /dev/null +++ b/src/riscv_reg.ml @@ -0,0 +1,138 @@ +(* This type consists of all 32 integer registers and 32 FP registers of RISC-V. *) +(* It also contains 2 special marks about spilt registers. *) +type t = +| Zero +| Ra +| Sp +| Gp +| Tp +| T0 +| T1 +| T2 +| Fp +| S1 +| A0 +| A1 +| A2 +| A3 +| A4 +| A5 +| A6 +| A7 +| S2 +| S3 +| S4 +| S5 +| S6 +| S7 +| S8 +| S9 +| S10 +| S11 +| T3 +| T4 +| T5 +| T6 +| Ft0 +| Ft1 +| Ft2 +| Ft3 +| Ft4 +| Ft5 +| Ft6 +| Ft7 +| Fs0 +| Fs1 +| Fa0 +| Fa1 +| Fa2 +| Fa3 +| Fa4 +| Fa5 +| Fa6 +| Fa7 +| Fs2 +| Fs3 +| Fs4 +| Fs5 +| Fs6 +| Fs7 +| Fs8 +| Fs9 +| Fs10 +| Fs11 +| Ft8 +| Ft9 +| Ft10 +| Ft11 +| Spilt of int (* This integer is offset from stack pointer. *) +| SpiltFP of int + +let to_string (t: t) = + match t with + | Zero -> "zero" + | Ra -> "ra" + | Sp -> "sp" + | Gp -> "gp" + | Tp -> "tp" + | T0 -> "t0" + | T1 -> "t1" + | T2 -> "t2" + | Fp -> "fp" + | S1 -> "s1" + | A0 -> "a0" + | A1 -> "a1" + | A2 -> "a2" + | A3 -> "a3" + | A4 -> "a4" + | A5 -> "a5" + | A6 -> "a6" + | A7 -> "a7" + | S2 -> "s2" + | S3 -> "s3" + | S4 -> "s4" + | S5 -> "s5" + | S6 -> "s6" + | S7 -> "s7" + | S8 -> "s8" + | S9 -> "s9" + | S10 -> "s10" + | S11 -> "s11" + | T3 -> "t3" + | T4 -> "t4" + | T5 -> "t5" + | T6 -> "t6" + | Ft0 -> "ft0" + | Ft1 -> "ft1" + | Ft2 -> "ft2" + | Ft3 -> "ft3" + | Ft4 -> "ft4" + | Ft5 -> "ft5" + | Ft6 -> "ft6" + | Ft7 -> "ft7" + | Fs0 -> "fs0" + | Fs1 -> "fs1" + | Fa0 -> "fa0" + | Fa1 -> "fa1" + | Fa2 -> "fa2" + | Fa3 -> "fa3" + | Fa4 -> "fa4" + | Fa5 -> "fa5" + | Fa6 -> "fa6" + | Fa7 -> "fa7" + | Fs2 -> "fs2" + | Fs3 -> "fs3" + | Fs4 -> "fs4" + | Fs5 -> "fs5" + | Fs6 -> "fs6" + | Fs7 -> "fs7" + | Fs8 -> "fs8" + | Fs9 -> "fs9" + | Fs10 -> "fs10" + | Fs11 -> "fs11" + | Ft8 -> "ft8" + | Ft9 -> "ft9" + | Ft10 -> "ft10" + | Ft11 -> "ft11" + | Spilt x -> "spilt." ^ Int.to_string x + | SpiltFP x -> "spilt.d." ^ Int.to_string x \ No newline at end of file diff --git a/src/riscv_ssa.ml b/src/riscv_ssa.ml new file mode 100644 index 0000000..1147384 --- /dev/null +++ b/src/riscv_ssa.ml @@ -0,0 +1,41 @@ +(* Convert common IR into form of static single assignment (SSA). *) + +type var = string +type func = string +type label = string + +type t = +| Add of var * var * var +| Sub of var * var * var +| Mul of var * var * var +| Div of var * var * var +| Call of var * func * var list + +let to_string t = + let convert_3reg ty rd rs1 rs2 = + Printf.sprintf "%s = %s %s, %s" rd ty rs1 rs2 + in + match t with + | Add (rd, rs1, rs2) -> convert_3reg "add" rd rs1 rs2 + | Sub (rd, rs1, rs2) -> convert_3reg "sub" rd rs1 rs2 + | Mul (rd, rs1, rs2) -> convert_3reg "mul" rd rs1 rs2 + | Div (rd, rs1, rs2) -> convert_3reg "div" rd rs1 rs2 + | Call (rd, fn, args) -> Printf.sprintf "%s = call %s (%s)" rd fn (String.concat ", " args) + +(* Current counter of temporaries *) +let slot = ref 0 + +(* Construct a new temporary name *) +let new_temp () = + let name = "%" ^ Int.to_string !slot in + slot := !slot + 1; + name + +let convert_toplevel (top: Mcore.top_item) = + match top with + | Ctop_expr _ -> () + | _ -> () + +let ssa_of_mcore (core: Mcore.t) = + List.iter convert_toplevel core.body + diff --git a/src/s.ml b/src/s.ml index 0b0b7d2..4bba20a 100644 --- a/src/s.ml +++ b/src/s.ml @@ -13,15 +13,20 @@ *) +(** +S.ml is related to S-expression. See https://en.wikipedia.org/wiki/S-expression for more detail. + +For an intuitive explanation, S-expression is a Lisp-like language. + +It consists of either an "Atom" like `x`, `y` or `z`, or a "List" like (x y) or (x y z). We can always limit Lists to be of length 2, as (x y z) can be easily parsed as ((x y) z), but this approach is not taken here. +*) type t = Atom of string | List of t list let rec equal (x : t) (y : t) = - match x with - | Atom x -> ( match y with Atom y -> x = y | _ -> false) - | List ls -> ( - match y with - | List ly -> ( try List.for_all2 equal ls ly with _ -> false) - | _ -> false) + match x, y with + | Atom x, Atom y -> x = y + | List ls, List ly -> (try List.for_all2 equal ls ly with _ -> false) + | _ -> false let sexp_of_t (x : t) : t = x [@@dead "+sexp_of_t"] diff --git a/src/trait_impl.ml b/src/trait_impl.ml index 7a78c6c..129af54 100644 --- a/src/trait_impl.ml +++ b/src/trait_impl.ml @@ -67,8 +67,6 @@ module H = Basic_hashf.Make (struct type t = Type_path.t * Type_path.t include struct - let _ = fun (_ : t) -> () - let sexp_of_t = (fun (arg0__012_, arg1__013_) -> let res0__014_ = Type_path.sexp_of_t arg0__012_ @@ -76,8 +74,6 @@ module H = Basic_hashf.Make (struct S.List [ res0__014_; res1__015_ ] : t -> S.t) - let _ = sexp_of_t - let equal = (fun a__016_ b__017_ -> let t__018_, t__019_ = a__016_ in @@ -87,26 +83,16 @@ module H = Basic_hashf.Make (struct (Type_path.equal t__019_ t__021_) : t -> t -> bool) - let _ = equal - - let (hash_fold_t : Ppx_base.state -> t -> Ppx_base.state) = - fun hsv arg -> - let e0, e1 = arg in + let hash_fold_t hsv (e0, e1) = let hsv = Type_path.hash_fold_t hsv e0 in let hsv = Type_path.hash_fold_t hsv e1 in hsv - let _ = hash_fold_t - - let (hash : t -> Ppx_base.hash_value) = - let func arg = - Ppx_base.get_hash_value - (let hsv = Ppx_base.create () in - hash_fold_t hsv arg) - in - fun x -> func x + let hash arg = + Ppx_base.get_hash_value + (let hsv = Ppx_base.create () in + hash_fold_t hsv arg) - let _ = hash end end) diff --git a/src/w.ml b/src/w.ml index 7754de5..9aef9e2 100644 --- a/src/w.ml +++ b/src/w.ml @@ -12,16 +12,19 @@ . *) +(** +This is representation of WASM. +It is very similar to S-expressions (in s.ml), but there are subtle differences. +Try `diff w.ml s.ml` to highlight them. +*) type t = Atom of string | List of t list let rec equal (x : t) (y : t) = - match x with - | Atom x -> ( match y with Atom y -> x = y | _ -> false) - | List ls -> ( - match y with - | List ly -> ( try List.for_all2 equal ls ly with _ -> false) - | _ -> false) + match x, y with + | Atom x, Atom y -> x = y + | List ls, List ly -> (try List.for_all2 equal ls ly with _ -> false) + | _ -> false let rec sexp_of_t = function | Atom s -> S.Atom s From 3763d5abc2184450f365fc127fd7e78e4c1035b9 Mon Sep 17 00:00:00 2001 From: AdUhTkJm <2292398666@qq.com> Date: Fri, 20 Dec 2024 15:19:46 +0800 Subject: [PATCH 2/7] Basic data structures for risc-v backend --- src/basic_core_ident.ml | 124 ++++++++++++++------------------- src/basic_qual_ident.ml | 5 +- src/mtype.ml | 149 +++++++++++++--------------------------- src/riscv_ssa.ml | 112 +++++++++++++++++++++++------- 4 files changed, 191 insertions(+), 199 deletions(-) diff --git a/src/basic_core_ident.ml b/src/basic_core_ident.ml index 1ff8916..b842c4f 100644 --- a/src/basic_core_ident.ml +++ b/src/basic_core_ident.ml @@ -28,34 +28,22 @@ module Key = struct include struct let _ = fun (_ : t) -> () - let (hash_fold_t : Ppx_base.state -> t -> Ppx_base.state) = - (fun hsv arg -> - match arg with - | Pdot _a0 -> - let hsv = Ppx_base.hash_fold_int hsv 0 in - let hsv = hsv in - Qual_ident.hash_fold_t hsv _a0 - | Plocal_method _a0 -> - let hsv = Ppx_base.hash_fold_int hsv 1 in - let hsv = hsv in - Ident.hash_fold_local_method hsv _a0 - | Pident _ir -> - let hsv = Ppx_base.hash_fold_int hsv 2 in - let hsv = - let hsv = hsv in - Ppx_base.hash_fold_int hsv _ir.stamp - in - hsv - | Pmutable_ident _ir -> - let hsv = Ppx_base.hash_fold_int hsv 3 in - let hsv = - let hsv = hsv in - Ppx_base.hash_fold_int hsv _ir.stamp - in - hsv - : Ppx_base.state -> t -> Ppx_base.state) - - let _ = hash_fold_t + let hash_fold_t hsv arg = + match arg with + | Pdot _a0 -> + let hsv = Ppx_base.hash_fold_int hsv 0 in + Qual_ident.hash_fold_t hsv _a0 + | Plocal_method _a0 -> + let hsv = Ppx_base.hash_fold_int hsv 1 in + Ident.hash_fold_local_method hsv _a0 + | Pident _ir -> + let hsv = Ppx_base.hash_fold_int hsv 2 in + let hsv = Ppx_base.hash_fold_int hsv _ir.stamp in + hsv + | Pmutable_ident _ir -> + let hsv = Ppx_base.hash_fold_int hsv 3 in + let hsv = Ppx_base.hash_fold_int hsv _ir.stamp in + hsv let (hash : t -> Ppx_base.hash_value) = let func arg = @@ -65,52 +53,40 @@ module Key = struct in fun x -> func x - let _ = hash - - let equal = - (fun a__001_ b__002_ -> - if Stdlib.( == ) a__001_ b__002_ then true - else - match (a__001_, b__002_) with - | Pdot _a__003_, Pdot _b__004_ -> Qual_ident.equal _a__003_ _b__004_ - | Pdot _, _ -> false - | _, Pdot _ -> false - | Plocal_method _a__005_, Plocal_method _b__006_ -> - Ident.equal_local_method _a__005_ _b__006_ - | Plocal_method _, _ -> false - | _, Plocal_method _ -> false - | Pident _a__007_, Pident _b__008_ -> - Stdlib.( = ) (_a__007_.stamp : int) _b__008_.stamp - | Pident _, _ -> false - | _, Pident _ -> false - | Pmutable_ident _a__009_, Pmutable_ident _b__010_ -> - Stdlib.( = ) (_a__009_.stamp : int) _b__010_.stamp - : t -> t -> bool) - - let _ = equal - - let compare = - (fun a__011_ b__012_ -> - if Stdlib.( == ) a__011_ b__012_ then 0 - else - match (a__011_, b__012_) with - | Pdot _a__013_, Pdot _b__014_ -> - Qual_ident.compare _a__013_ _b__014_ - | Pdot _, _ -> -1 - | _, Pdot _ -> 1 - | Plocal_method _a__015_, Plocal_method _b__016_ -> - Ident.compare_local_method _a__015_ _b__016_ - | Plocal_method _, _ -> -1 - | _, Plocal_method _ -> 1 - | Pident _a__017_, Pident _b__018_ -> - Stdlib.compare (_a__017_.stamp : int) _b__018_.stamp - | Pident _, _ -> -1 - | _, Pident _ -> 1 - | Pmutable_ident _a__019_, Pmutable_ident _b__020_ -> - Stdlib.compare (_a__019_.stamp : int) _b__020_.stamp - : t -> t -> int) - - let _ = compare + let equal a b = + if a == b then true + else match (a, b) with + | Pdot x, Pdot y -> Qual_ident.equal x y + | Pdot _, _ -> false + | _, Pdot _ -> false + | Plocal_method x, Plocal_method y -> + Ident.equal_local_method x y + | Plocal_method _, _ -> false + | _, Plocal_method _ -> false + | Pident x, Pident y -> + x.stamp = y.stamp + | Pident _, _ -> false + | _, Pident _ -> false + | Pmutable_ident x, Pmutable_ident y -> + x.stamp = y.stamp + + let compare a b = + if a == b then 0 + else match (a, b) with + | Pdot x, Pdot y -> + Qual_ident.compare x y + | Pdot _, _ -> -1 + | _, Pdot _ -> 1 + | Plocal_method x, Plocal_method y -> + Ident.compare_local_method x y + | Plocal_method _, _ -> -1 + | _, Plocal_method _ -> 1 + | Pident x, Pident y -> + Stdlib.compare x.stamp y.stamp + | Pident _, _ -> -1 + | _, Pident _ -> 1 + | Pmutable_ident x, Pmutable_ident y -> + Stdlib.compare x.stamp y.stamp end let to_string (x : t) = diff --git a/src/basic_qual_ident.ml b/src/basic_qual_ident.ml index e4511c0..3b2a7cd 100644 --- a/src/basic_qual_ident.ml +++ b/src/basic_qual_ident.ml @@ -12,11 +12,14 @@ . *) - module Type_path = Basic_type_path module Config = Basic_config module Strutil = Basic_strutil +(** +Qual ident stands for qualified identifier. +In other words, an identifier like `@immut/hashmap.T`. +*) type t = | Qregular of { pkg : string; name : string } | Qregular_implicit_pkg of { pkg : string; name : string } diff --git a/src/mtype.ml b/src/mtype.ml index 672068c..51d8068 100644 --- a/src/mtype.ml +++ b/src/mtype.ml @@ -55,92 +55,54 @@ type t = [@@warning "+4"] include struct - let _ = fun (_ : t) -> () - - let rec sexp_of_t = - (function - | T_int -> S.Atom "T_int" - | T_char -> S.Atom "T_char" - | T_bool -> S.Atom "T_bool" - | T_unit -> S.Atom "T_unit" - | T_byte -> S.Atom "T_byte" - | T_int64 -> S.Atom "T_int64" - | T_uint -> S.Atom "T_uint" - | T_uint64 -> S.Atom "T_uint64" - | T_float -> S.Atom "T_float" - | T_double -> S.Atom "T_double" - | T_string -> S.Atom "T_string" - | T_bytes -> S.Atom "T_bytes" - | T_optimized_option { elem = elem__002_ } -> - let bnds__001_ = ([] : _ Stdlib.List.t) in - let bnds__001_ = - let arg__003_ = sexp_of_t elem__002_ in - (S.List [ S.Atom "elem"; arg__003_ ] :: bnds__001_ : _ Stdlib.List.t) - in - S.List (S.Atom "T_optimized_option" :: bnds__001_) - | T_func { params = params__005_; return = return__007_ } -> - let bnds__004_ = ([] : _ Stdlib.List.t) in - let bnds__004_ = - let arg__008_ = sexp_of_t return__007_ in - (S.List [ S.Atom "return"; arg__008_ ] :: bnds__004_ - : _ Stdlib.List.t) - in - let bnds__004_ = - let arg__006_ = Moon_sexp_conv.sexp_of_list sexp_of_t params__005_ in - (S.List [ S.Atom "params"; arg__006_ ] :: bnds__004_ - : _ Stdlib.List.t) - in - S.List (S.Atom "T_func" :: bnds__004_) - | T_tuple { tys = tys__010_ } -> - let bnds__009_ = ([] : _ Stdlib.List.t) in - let bnds__009_ = - let arg__011_ = Moon_sexp_conv.sexp_of_list sexp_of_t tys__010_ in - (S.List [ S.Atom "tys"; arg__011_ ] :: bnds__009_ : _ Stdlib.List.t) - in - S.List (S.Atom "T_tuple" :: bnds__009_) - | T_fixedarray { elem = elem__013_ } -> - let bnds__012_ = ([] : _ Stdlib.List.t) in - let bnds__012_ = - let arg__014_ = sexp_of_t elem__013_ in - (S.List [ S.Atom "elem"; arg__014_ ] :: bnds__012_ : _ Stdlib.List.t) - in - S.List (S.Atom "T_fixedarray" :: bnds__012_) - | T_constr arg0__015_ -> - let res0__016_ = sexp_of_id arg0__015_ in - S.List [ S.Atom "T_constr"; res0__016_ ] - | T_trait arg0__017_ -> - let res0__018_ = sexp_of_id arg0__017_ in - S.List [ S.Atom "T_trait"; res0__018_ ] - | T_any { name = name__020_ } -> - let bnds__019_ = ([] : _ Stdlib.List.t) in - let bnds__019_ = - let arg__021_ = sexp_of_id name__020_ in - (S.List [ S.Atom "name"; arg__021_ ] :: bnds__019_ : _ Stdlib.List.t) - in - S.List (S.Atom "T_any" :: bnds__019_) - | T_maybe_uninit arg0__022_ -> - let res0__023_ = sexp_of_t arg0__022_ in - S.List [ S.Atom "T_maybe_uninit"; res0__023_ ] - | T_error_value_result { ok = ok__025_; err = err__027_; id = id__029_ } -> - let bnds__024_ = ([] : _ Stdlib.List.t) in - let bnds__024_ = - let arg__030_ = sexp_of_id id__029_ in - (S.List [ S.Atom "id"; arg__030_ ] :: bnds__024_ : _ Stdlib.List.t) - in - let bnds__024_ = - let arg__028_ = sexp_of_t err__027_ in - (S.List [ S.Atom "err"; arg__028_ ] :: bnds__024_ : _ Stdlib.List.t) - in - let bnds__024_ = - let arg__026_ = sexp_of_t ok__025_ in - (S.List [ S.Atom "ok"; arg__026_ ] :: bnds__024_ : _ Stdlib.List.t) - in - S.List (S.Atom "T_error_value_result" :: bnds__024_) - : t -> S.t) - - let _ = sexp_of_t + let rec sexp_of_t t = match t with + | T_int -> S.Atom "int" + | T_char -> S.Atom "char" + | T_bool -> S.Atom "bool" + | T_unit -> S.Atom "unit" + | T_byte -> S.Atom "byte" + | T_int64 -> S.Atom "int64" + | T_uint -> S.Atom "uint" + | T_uint64 -> S.Atom "uint64" + | T_float -> S.Atom "float" + | T_double -> S.Atom "double" + | T_string -> S.Atom "string" + | T_bytes -> S.Atom "bytes" + | T_optimized_option { elem } -> + let x = [S.List [ S.Atom "elem"; sexp_of_t elem ]] in + S.List (S.Atom "optimized_option" :: x) + | T_func { params; return = ret } -> + let x = [S.List [ S.Atom "return"; sexp_of_t ret ]] in + let y = + S.List [ S.Atom "params"; Moon_sexp_conv.sexp_of_list sexp_of_t params ] :: x + in + S.List (S.Atom "func" :: y) + | T_tuple { tys } -> + let x = + [S.List [ S.Atom "tys"; Moon_sexp_conv.sexp_of_list sexp_of_t tys ]] + in + S.List (S.Atom "tuple" :: x) + | T_fixedarray { elem } -> + let x = [S.List [ S.Atom "elem"; sexp_of_t elem ]] in + S.List (S.Atom "fixedarray" :: x) + | T_constr x -> + S.List [ S.Atom "constr"; sexp_of_id x ] + | T_trait x -> + S.List [ S.Atom "trait"; sexp_of_id x ] + | T_any { name } -> + let x = [S.List [ S.Atom "name"; sexp_of_id name ]] in + S.List (S.Atom "any" :: x) + | T_maybe_uninit x -> + S.List [ S.Atom "maybe_uninit"; sexp_of_t x ] + | T_error_value_result { ok; err; id } -> + let x = [S.List [ S.Atom "id"; sexp_of_id id ]] in + let y = S.List [ S.Atom "err"; sexp_of_t err ] :: x in + let z = S.List [ S.Atom "ok"; sexp_of_t ok ] :: y in + S.List (S.Atom "error_value_result" :: z) end +let to_string (t: t) = sexp_of_t t |> S.to_string + let is_numeric (t : t) = match t with | T_unit | T_int | T_uint | T_char | T_bool | T_byte | T_int64 | T_uint64 @@ -176,19 +138,6 @@ include struct let _ = sexp_of_field_name end -let field_index0 = Indexed 0 -let field_index1 = Indexed 1 -let field_index2 = Indexed 2 -let field_index3 = Indexed 3 - -let field_indexed i = - match i with - | 0 -> field_index0 - | 1 -> field_index1 - | 2 -> field_index2 - | 3 -> field_index3 - | n -> Indexed n - type field_info = { field_type : t; name : field_name; mut : bool } include struct @@ -615,7 +564,7 @@ let from_stype (stype : Stype.t) ~(stype_defs : Typing_info.stype_defs) let payload = Lst.mapi c.cs_args (fun i ty -> let field_type = go ty in - { field_type; name = field_indexed i; mut = false }) + { field_type; name = Indexed i; mut = false }) in { payload; tag } in @@ -640,7 +589,7 @@ let from_stype (stype : Stype.t) ~(stype_defs : Typing_info.stype_defs) | Labelled { label; is_mut = mut; _ } -> { field_type; name = Named label; mut } | Positional index -> - { field_type; name = field_indexed index; mut = false } + { field_type; name = Indexed index; mut = false } | Optional _ | Autofill _ | Question_optional _ -> assert false) in @@ -679,7 +628,7 @@ let from_stype (stype : Stype.t) ~(stype_defs : Typing_info.stype_defs) | Labelled { label; is_mut = mut; _ } -> { field_type; name = Named label; mut } | Positional index -> - { field_type; name = field_indexed index; mut = false } + { field_type; name = Indexed index; mut = false } | Optional _ | Autofill _ | Question_optional _ -> assert false) in diff --git a/src/riscv_ssa.ml b/src/riscv_ssa.ml index 1147384..aae32f8 100644 --- a/src/riscv_ssa.ml +++ b/src/riscv_ssa.ml @@ -1,31 +1,80 @@ -(* Convert common IR into form of static single assignment (SSA). *) +(** Convert common IR into form of static single assignment (SSA). *) -type var = string -type func = string -type label = string +type var = { + name: string; + typ: Mtype.t; +} -type t = -| Add of var * var * var -| Sub of var * var * var -| Mul of var * var * var -| Div of var * var * var -| Call of var * func * var list +let to_string (r: var) = + Printf.sprintf "%s: %s" r.name (Mtype.to_string r.typ) + + +(** +Similar to R-type instructions in RISC-V, hence the name. +It consists of `rd` (destination) and two sources `rs1` and `rs2`. +*) +type r_type = { + rd: var; + rs1: var; + rs2: var; +} + +(** +Calls function named `fn` with args `args`, +and store the result in `rd`. +*) +type call_data = { + rd: var; + fn: string; + args: var list; +} + +type fn = { + fn: string; + args: var list; + body: t list; + is_export: bool; +} + +(** Instructions available in SSA. *) +and t = +| Add of r_type +| Sub of r_type +| Mul of r_type +| Div of r_type +| Call of call_data +| FnDecl of fn let to_string t = - let convert_3reg ty rd rs1 rs2 = - Printf.sprintf "%s = %s %s, %s" rd ty rs1 rs2 + let convert_rtype ty ({rd; rs1; rs2}: r_type) = + let rd_str = to_string rd in + let rs1_str = to_string rs1 in + let rs2_str = to_string rs2 in + Printf.sprintf "%s = %s %s, %s" rd_str ty rs1_str rs2_str in - match t with - | Add (rd, rs1, rs2) -> convert_3reg "add" rd rs1 rs2 - | Sub (rd, rs1, rs2) -> convert_3reg "sub" rd rs1 rs2 - | Mul (rd, rs1, rs2) -> convert_3reg "mul" rd rs1 rs2 - | Div (rd, rs1, rs2) -> convert_3reg "div" rd rs1 rs2 - | Call (rd, fn, args) -> Printf.sprintf "%s = call %s (%s)" rd fn (String.concat ", " args) - -(* Current counter of temporaries *) + + (** Deal with indentation inside functions. *) + let rec to_str_with_depth t depth = + String.make (depth * 2) ' ' ^ + match t with + | Add r -> convert_rtype "add" r + | Sub r -> convert_rtype "sub" r + | Mul r -> convert_rtype "mul" r + | Div r -> convert_rtype "div" r + | Call { rd; fn; args } -> + let args_list = String.concat ", " (List.map to_string args) in + Printf.sprintf "%s = call %s (%s)" (to_string rd) fn args_list + | FnDecl { fn; args; body; is_export } -> + let args_str = String.concat ", " (List.map to_string args) in + let body_str = String.concat "\n" (List.map (fun t -> to_str_with_depth t (depth + 1)) body) in + let fn_prefix = if is_export then "export fn" else "fn" in + Printf.sprintf "%s %s (%s) {\n%s\n}" fn_prefix fn args_str body_str + in to_str_with_depth t 0 + +(** Current counter of temporaries. *) let slot = ref 0 -(* Construct a new temporary name *) +(** Construct a new temporary name. *) let new_temp () = let name = "%" ^ Int.to_string !slot in slot := !slot + 1; @@ -33,9 +82,24 @@ let new_temp () = let convert_toplevel (top: Mcore.top_item) = match top with - | Ctop_expr _ -> () - | _ -> () + | Ctop_expr _ -> [] + | Ctop_fn { binder; func; export_info_; loc_ } -> + let var_of_param ({ binder; ty; loc_ } : Mcore.param) = + { + name = Basic_core_ident.to_string binder; + typ = ty + } + in + let fn = Basic_core_ident.to_string binder in + let args = List.map var_of_param func.params in + [ + FnDecl { fn; args; body = []; is_export = false} + ] + | _ -> [] let ssa_of_mcore (core: Mcore.t) = - List.iter convert_toplevel core.body + Basic_io.write_s "core.ir" (Mcore.sexp_of_t core); + let body = List.map convert_toplevel core.body |> List.flatten in + Basic_io.write "core.ssa" (String.concat "\n" (List.map to_string body)); + body From af3eaa6edb15958388e7232b46575b575b86242c Mon Sep 17 00:00:00 2001 From: AdUhTkJm <2292398666@qq.com> Date: Fri, 20 Dec 2024 17:34:42 +0800 Subject: [PATCH 3/7] Produces SSA for basic operations --- src/primitive.ml | 8 +++ src/riscv_ssa.ml | 159 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 140 insertions(+), 27 deletions(-) diff --git a/src/primitive.ml b/src/primitive.ml index 634af78..399e8c9 100644 --- a/src/primitive.ml +++ b/src/primitive.ml @@ -636,6 +636,14 @@ let prim_table = let find_prim (name : string) : prim option = Hash_string.find_opt prim_table name +let find_name prim = + let table = Basic_hash_gen.to_list prim_table in + let filtered = List.filter (fun (str, p) -> p = prim) table in + match filtered with + | [] -> None + | [(str, p)] -> Some str + | _ -> failwith "primitive.ml: duplicate intrinsics" + let is_intrinsic (p : prim) = match p with Pintrinsic _ -> true | _ -> false let is_pure (prim : prim) = diff --git a/src/riscv_ssa.ml b/src/riscv_ssa.ml index ad4714f..9a058a2 100644 --- a/src/riscv_ssa.ml +++ b/src/riscv_ssa.ml @@ -31,6 +31,42 @@ type call_data = { args: var list; } +(** +Lengths of immediates. +Other lengths (like 16-bit short) are not supported currently. +*) +type imm_type = Bit32 | Bit64 + +(** +Assigns (un-)signed integer `imm` to `rd`. +*) +type assign_int = { + rd: var; + imm: int64; + size: imm_type; + signed: bool; +} + +(** +Assigns floating point number `imm` to `rd`. +*) +type assign_fp = { + rd: var; + imm: float; + size: imm_type; +} + +(** +Assigns string `imm` to `rd`. + +We don't care about how string should be represented; +that's the job for riscv.ml. +*) +type assign_str = { + rd: var; + imm: string; +} + type fn = { fn: string; args: var list; @@ -45,6 +81,9 @@ and t = | Mul of r_type | Div of r_type | Call of call_data +| AssignInt of assign_int +| AssignFP of assign_fp +| AssignStr of assign_str | FnDecl of fn | Nop @@ -64,15 +103,39 @@ let to_string t = | Sub r -> convert_rtype "sub" r | Mul r -> convert_rtype "mul" r | Div r -> convert_rtype "div" r + | Call { rd; fn; args } -> let args_list = String.concat ", " (List.map to_string args) in Printf.sprintf "%s = call %s (%s)" (to_string rd) fn args_list + + | AssignInt { rd; imm; size; signed } -> + (* We follow C convention of representing literals. *) + let suffix = (match (size, signed) with + | (Bit32, true) -> "" + | (Bit64, true) -> "ll" + | (Bit32, false) -> "u" + | (Bit64, false) -> "ull") + in + Printf.sprintf "%s = %s%s" (to_string rd) (Int64.to_string imm) suffix + + | AssignFP { rd; imm; size; } -> + let suffix = (match size with + | Bit32 -> "f" + | Bit64 -> "") + in + Printf.sprintf "%s = %s%s" (to_string rd) (Float.to_string imm) suffix + + | AssignStr { rd; imm; } -> + Printf.sprintf "%s = \"%s\"" (to_string rd) imm + | FnDecl { fn; args; body; return; } -> let args_str = String.concat ", " (List.map to_string args) in let body_str = String.concat "\n" (List.map (fun t -> to_str_with_depth t (depth + 1)) body) in - let return_str = String.make (depth * 2 + 2) ' ' ^ (to_string return) in + let return_str = String.make (depth * 2 + 2) ' ' ^ "return " ^ (to_string return) in Printf.sprintf "fn %s (%s) {\n%s\n%s\n}" fn args_str body_str return_str + | Nop -> "nop" + in to_str_with_depth t 0 (** Counter of temporaries. *) @@ -85,16 +148,6 @@ let new_temp ty = { name; ty } (** -We need to use a mutable structure to generate SSA. - -However, this structure cannot be carried as return value, -since we must return the register in which the result of an SSA instruction is stored. - -Therefore the assembly generated is placed in a global variable. -*) -let ssa = Basic_vec.make ~dummy:Nop 20 - -(* Currently I don't know what does `prim` ever mean in some places, so I ignore them in total. @@ -105,11 +158,11 @@ let warn prim = match prim with | Some _ -> prerr_endline "warning: prim is not null" (** -As noted above, this function stores the SSA generated in global variable `ssa`. +This function stores the SSA generated in the given argument `ssa`. -It returns the variable in which +It returns the variable in which the result of the last instruction pushed is stored. *) -let do_convert (expr: Mcore.expr) = +let rec do_convert ssa (expr: Mcore.expr) = match expr with | Cexpr_var { id; ty; prim; _ } -> warn prim; @@ -117,32 +170,73 @@ let do_convert (expr: Mcore.expr) = (* We treat primitives like special functions. - It's just that we prefix them with `__prim_`, - in order to distinguish from user-defined functions. + + Since all primitives start with "%", + there is no risk that one of them coincide with user-defined functions. *) | Cexpr_prim { prim; args; ty; _ } -> - let rd = new_temp ty in - rd + let rd = new_temp ty in + let name = Primitive.find_name prim in + let args = List.map (fun expr -> do_convert ssa expr) args in + let fn = match name with + | None -> failwith "riscv_ssa.ml: unrecognized intrinsics" + | Some x -> x + in + Basic_vec.push ssa (Call { rd; fn; args }); + rd | Cexpr_apply { func; args; ty; prim; _ } -> - warn prim; - let rd = new_temp ty in - rd + warn prim; + let rd = new_temp ty in + let fn = Ident.to_string func in + let args = List.map (fun expr -> do_convert ssa expr) args in + Basic_vec.push ssa (Call { rd; fn; args }); + rd - | _ -> failwith "TODO: cannot deal with this" + | Cexpr_const { c; ty; _ } -> + let rd = new_temp ty in + let instruction = (match c with + | C_string imm -> + AssignStr { rd; imm; } + | C_bool imm -> + AssignInt { rd; imm = Int64.of_int (if imm then 1 else 0); size = Bit32; signed = true; } + | C_int { v; _ } -> + AssignInt { rd; imm = Int64.of_int32 v; size = Bit32; signed = true; } + | C_int64 { v; _ } -> + AssignInt { rd; imm = v; size = Bit64; signed = true; } + | C_uint { v; _ } -> + AssignInt { rd; imm = Int64.of_int32 v; size = Bit32; signed = false; } + | C_uint64 { v; _ } -> + AssignInt { rd; imm = v; size = Bit64; signed = false; } + | C_float { v; _ } -> + AssignFP { rd; imm = v; size = Bit32; } + | C_double { v; _ } -> + AssignFP { rd; imm = v; size = Bit64; } + | _ -> failwith "TODO: riscv_ssa.ml: unsupported constant type" + ) in + Basic_vec.push ssa instruction; + rd + | _ -> failwith "TODO: riscv_ssa.ml: cannot deal with this expression" + +(** +Converts given `expr` into a list of SSA instructions, +along with the variable in which the result of this expression is stored. +*) let convert_expr (expr: Mcore.expr) = - let return = do_convert expr in + let ssa = Basic_vec.make ~dummy:Nop 20 in + let return = do_convert ssa expr in (Basic_vec.map_into_list ssa (fun x -> x), return) let convert_toplevel (top: Mcore.top_item) = match top with | Ctop_expr _ -> [] + | Ctop_fn { binder; func; export_info_; loc_ } -> let var_of_param ({ binder; ty; _ } : Mcore.param) = - { name = Basic_core_ident.to_string binder; ty } + { name = Ident.to_string binder; ty } in - let fn = Basic_core_ident.to_string binder in + let fn = Ident.to_string binder in let args = List.map var_of_param func.params in let (body, return) = convert_expr func.body in if export_info_ != None then @@ -150,11 +244,22 @@ let convert_toplevel (top: Mcore.top_item) = [ FnDecl { fn; args; body; return } ] + | _ -> [] let ssa_of_mcore (core: Mcore.t) = Basic_io.write_s "core.ir" (Mcore.sexp_of_t core); + (* Deal with other functions *) let body = List.map convert_toplevel core.body |> List.flatten in - Basic_io.write "core.ssa" (String.concat "\n" (List.map to_string body)); - body + (* Deal with main *) + let with_main = match core.main with + | Some (main_expr, _) -> + let (main_body, return) = convert_expr main_expr in + let main_decl = FnDecl { fn = "main"; args = []; body = main_body; return } in + main_decl :: body + + | None -> body + in + Basic_io.write "core.ssa" (String.concat "\n" (List.map to_string with_main)); + with_main From b409a1c2f8f8c89b61acb0691af820eaa4147523 Mon Sep 17 00:00:00 2001 From: AdUhTkJm <2292398666@qq.com> Date: Sat, 21 Dec 2024 17:28:35 +0800 Subject: [PATCH 4/7] SSA for loops, branches and memory access --- src/basic_vec.ml | 4 + src/mtype.ml | 10 +- src/primitive.ml | 8 - src/riscv_ssa.ml | 565 ++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 511 insertions(+), 76 deletions(-) diff --git a/src/basic_vec.ml b/src/basic_vec.ml index 441be4a..9461174 100644 --- a/src/basic_vec.ml +++ b/src/basic_vec.ml @@ -170,6 +170,10 @@ let push (d : 'a t) v = d.len <- d_len + 1; d.arr.!(d_len) <- v) +(** Similar to push, but for a whole vector. *) +let append vec other = + iter other (fun x -> push vec x) + let insert (d : 'a t) idx elt = let enlarge size = if size >= Sys.max_array_length then failwith "exceeds max_array_length"; diff --git a/src/mtype.ml b/src/mtype.ml index 51d8068..609acc4 100644 --- a/src/mtype.ml +++ b/src/mtype.ml @@ -20,9 +20,9 @@ module Lst = Basic_lst type id = string include struct - let _ = fun (_ : id) -> () + let sexp_of_id = (Moon_sexp_conv.sexp_of_string : id -> S.t) - let _ = sexp_of_id + end module Id_hash : Basic_hash_intf.S with type key = id = Basic_hash_string @@ -123,7 +123,6 @@ let is_numeric (t : t) = type field_name = Named of string | Indexed of int include struct - let _ = fun (_ : field_name) -> () let sexp_of_field_name = (function @@ -135,14 +134,11 @@ include struct S.List [ S.Atom "Indexed"; res0__034_ ] : field_name -> S.t) - let _ = sexp_of_field_name end type field_info = { field_type : t; name : field_name; mut : bool } include struct - let _ = fun (_ : field_info) -> () - let sexp_of_field_info = (fun { field_type = field_type__036_; name = name__038_; mut = mut__040_ } -> let bnds__035_ = ([] : _ Stdlib.List.t) in @@ -161,8 +157,6 @@ include struct in S.List bnds__035_ : field_info -> S.t) - - let _ = sexp_of_field_info end type constr_info = { payload : field_info list; tag : Tag.t } diff --git a/src/primitive.ml b/src/primitive.ml index 399e8c9..634af78 100644 --- a/src/primitive.ml +++ b/src/primitive.ml @@ -636,14 +636,6 @@ let prim_table = let find_prim (name : string) : prim option = Hash_string.find_opt prim_table name -let find_name prim = - let table = Basic_hash_gen.to_list prim_table in - let filtered = List.filter (fun (str, p) -> p = prim) table in - match filtered with - | [] -> None - | [(str, p)] -> Some str - | _ -> failwith "primitive.ml: duplicate intrinsics" - let is_intrinsic (p : prim) = match p with Pintrinsic _ -> true | _ -> false let is_pure (prim : prim) = diff --git a/src/riscv_ssa.ml b/src/riscv_ssa.ml index 9a058a2..a613c69 100644 --- a/src/riscv_ssa.ml +++ b/src/riscv_ssa.ml @@ -7,20 +7,15 @@ type var = { ty: Mtype.t; } +(* +We store all discarded values (e.g. unit) into variable of this name. +*) +let discard = "_" + let to_string (r: var) = Printf.sprintf "%s: %s" r.name (Mtype.to_string r.ty) -(** -Similar to R-type instructions in RISC-V, hence the name. -It consists of `rd` (destination) and two sources `rs1` and `rs2`. -*) -type r_type = { - rd: var; - rs1: var; - rs2: var; -} - (** Calls function named `fn` with args `args`, and store the result in `rd`. @@ -35,7 +30,7 @@ type call_data = { Lengths of immediates. Other lengths (like 16-bit short) are not supported currently. *) -type imm_type = Bit32 | Bit64 +type imm_type = Bit32 | Bit64 | Bit8 (** Assigns (un-)signed integer `imm` to `rd`. @@ -67,45 +62,87 @@ type assign_str = { imm: string; } -type fn = { +(** +Assigns `rs` to `rd`. + +These are both variables, as opposed to other assign_* types. +*) +type assign = { + rd: var; + rs: var; +} + +(** +Similar to `ld` and `st` in RISC-V. + +`rd` and `rs` have different meanings in loads and stores: +We load things from `rs` into `rd`, +and store things from `rd` into `rs`. +*) +type mem_access = { + rd: var; + rs: var; + offset: int; +} + +type phi = { + rd: var; + rs: (var * string) list; +} + +type malloc = { + rd: var; + size: int; +} + +(** +`entry` is the first basic block we'll encounter in this function. +*) +and fn = { fn: string; args: var list; body: t list; return: var; } -(** Instructions available in SSA. *) -and t = -| Add of r_type -| Sub of r_type -| Mul of r_type -| Div of r_type +and branch = { + cond: var; + ifso: string; + ifnot: string; +} + +(** +Instructions available in 3-address code and SSA. + +You might be surprised that there's no Add, Sub etc; +it's because they are all implemented as Call. + +In MoonBit core IR, they use calls to "primitive functions" +to realise this, and we do a direct translation. +*) +and t = | Call of call_data | AssignInt of assign_int | AssignFP of assign_fp | AssignStr of assign_str +| Assign of assign +| Load of mem_access +| Store of mem_access +| Jump of string +| Branch of branch +| Label of string +| Phi of phi | FnDecl of fn +| Malloc of malloc | Nop let to_string t = - let convert_rtype ty ({rd; rs1; rs2}: r_type) = - let rd_str = to_string rd in - let rs1_str = to_string rs1 in - let rs2_str = to_string rs2 in - Printf.sprintf "%s = %s %s, %s" rd_str ty rs1_str rs2_str - in - (** Deal with indentation inside functions. *) - let rec to_str_with_depth t depth = + let rec str t depth = String.make (depth * 2) ' ' ^ match t with - | Add r -> convert_rtype "add" r - | Sub r -> convert_rtype "sub" r - | Mul r -> convert_rtype "mul" r - | Div r -> convert_rtype "div" r - | Call { rd; fn; args } -> - let args_list = String.concat ", " (List.map to_string args) in + let args_list = String.concat ", " (List.map (fun x -> x.name) args) in Printf.sprintf "%s = call %s (%s)" (to_string rd) fn args_list | AssignInt { rd; imm; size; signed } -> @@ -114,29 +151,58 @@ let to_string t = | (Bit32, true) -> "" | (Bit64, true) -> "ll" | (Bit32, false) -> "u" - | (Bit64, false) -> "ull") + | (Bit64, false) -> "ull" + | (Bit8, true) -> "b" + | (Bit8, false) -> "ub") in Printf.sprintf "%s = %s%s" (to_string rd) (Int64.to_string imm) suffix | AssignFP { rd; imm; size; } -> let suffix = (match size with | Bit32 -> "f" - | Bit64 -> "") + | Bit64 -> "" + | _ -> failwith "riscv_ssa.ml: bad floating-point length") in - Printf.sprintf "%s = %s%s" (to_string rd) (Float.to_string imm) suffix + Printf.sprintf "%s = %f%s" (to_string rd) imm suffix | AssignStr { rd; imm; } -> Printf.sprintf "%s = \"%s\"" (to_string rd) imm + + | Assign { rd; rs; } -> + Printf.sprintf "%s = %s" (to_string rd) rs.name + + | Load { rd; rs; offset } -> + Printf.sprintf "%s = %s[offset = %d]" (to_string rd) rs.name offset + + | Store { rd; rs; offset } -> + Printf.sprintf "%s[offset = %d] = %s" rs.name offset rd.name + + | Jump target -> + Printf.sprintf "jump %s" target + + | Branch { cond; ifso; ifnot } -> + Printf.sprintf "br %s true:%s false:%s" cond.name ifso ifnot + + | Label label -> + Printf.sprintf "\n%s%s:" (String.make (depth * 2 - 2) ' ') label + + | Phi { rd; rs } -> + let rs_str = List.map (fun (r, label) -> Printf.sprintf "%s[%s]" r.name label) rs in + Printf.sprintf "%s = φ %s" (to_string rd) (String.concat " " rs_str) + + | Malloc { rd; size } -> + Printf.sprintf "%s = malloc %d" rd.name size | FnDecl { fn; args; body; return; } -> let args_str = String.concat ", " (List.map to_string args) in - let body_str = String.concat "\n" (List.map (fun t -> to_str_with_depth t (depth + 1)) body) in - let return_str = String.make (depth * 2 + 2) ' ' ^ "return " ^ (to_string return) in - Printf.sprintf "fn %s (%s) {\n%s\n%s\n}" fn args_str body_str return_str + let return_str = String.make (depth * 2 + 2) ' ' ^ "return " ^ return.name in + let body_str = String.concat "\n" (List.map (fun t -> str t (depth + 1)) body) in + + Printf.sprintf "fn %s (%s) {\n%s\n%s\n}\n" fn args_str body_str return_str | Nop -> "nop" - in to_str_with_depth t 0 + in str t 0 (** Counter of temporaries. *) let slot = ref 0 @@ -147,6 +213,13 @@ let new_temp ty = slot := !slot + 1; { name; ty } + +(** Construct a new label. *) +let new_label prefix = + let name = prefix ^ Int.to_string !slot in + slot := !slot + 1; + name + (** Currently I don't know what does `prim` ever mean in some places, so I ignore them in total. @@ -157,6 +230,70 @@ let warn prim = match prim with | None -> () | Some _ -> prerr_endline "warning: prim is not null" + +let offset_table = Hashtbl.create 64 +let size_table = Hashtbl.create 64 + +let offsetof name pos = Hashtbl.find offset_table (name, pos) + + +(** This assumes RISCV64. Perhaps support 32 as well in future? *) +let rec sizeof ty = + let pointer_size = 8 in + + match ty with + | Mtype.T_bool -> 1 + | Mtype.T_byte -> 1 + | Mtype.T_bytes -> pointer_size + | Mtype.T_char -> 1 + | Mtype.T_double -> 8 + | Mtype.T_float -> 4 + | Mtype.T_func _ -> pointer_size + | Mtype.T_int -> 4 + | Mtype.T_int64 -> 8 + | Mtype.T_string -> pointer_size + | Mtype.T_uint -> 4 + | Mtype.T_uint64 -> 8 + | Mtype.T_unit -> 0 + | Mtype.T_tuple { tys } -> List.fold_left (fun total x -> total + sizeof x) 0 tys + | Mtype.T_constr id -> Hashtbl.find size_table id + | _ -> failwith "riscv_ssa.ml: cannot calculate size" + + +(** +Calculate offset of fields in record types. +*) +let update_types ({ defs; _ }: Mtype.defs) = + let types = Mtype.Id_hash.to_list defs in + + let visit (name, info) = + match info with + | Mtype.Placeholder -> () + | Mtype.Externref -> () + | Mtype.Trait _ -> () + + | Mtype.Record { fields } -> + let extract (x: Mtype.field_info) = x.field_type in + let field_types = List.map extract fields in + let field_sizes = List.map sizeof field_types in + let offset = ref 0 in + let offsets = List.map (fun x -> let y = !offset in offset := x + !offset; y) field_sizes in + List.iteri (fun i x -> Hashtbl.add offset_table (name, i) x) offsets; + Hashtbl.add size_table name !offset + + | _ -> failwith "TODO: riscv_ssa.ml: cannot deal with this type" + in + List.iter visit types + +(** +This is reserved for `continue`s. +See the match case for `Cexpr_loop` in `do_convert` for more details. + +It represents a list of continue clauses, each with a list of arguments and a label, +marking where the argument comes from. +*) +let conts: (var * string) list list ref = ref [] + (** This function stores the SSA generated in the given argument `ssa`. @@ -164,27 +301,56 @@ It returns the variable in which the result of the last instruction pushed is st *) let rec do_convert ssa (expr: Mcore.expr) = match expr with + | Cexpr_unit _ -> + { name = discard; ty = Mtype.T_unit } + | Cexpr_var { id; ty; prim; _ } -> - warn prim; - { name = Ident.to_string id; ty } + warn prim; + + let variable = { name = Ident.to_string id; ty } in + + (* We treat mutables as pointers. *) + (match id with + | Pmutable_ident _ -> + let rd = new_temp ty in + Basic_vec.push ssa (Load { rd; rs = variable; offset = 0 }); + rd + + | _ -> variable); - (* - We treat primitives like special functions. + + (* Not quite sure about this; is it simply a variable access? *) + | Cexpr_object { self; } -> + do_convert ssa self - Since all primitives start with "%", - there is no risk that one of them coincide with user-defined functions. + (* + We treat primitives like special functions. + + TODO: Now their names are hard to read; perhaps tidy it up sometime. *) | Cexpr_prim { prim; args; ty; _ } -> let rd = new_temp ty in - let name = Primitive.find_name prim in let args = List.map (fun expr -> do_convert ssa expr) args in - let fn = match name with - | None -> failwith "riscv_ssa.ml: unrecognized intrinsics" - | Some x -> x - in + let fn = Primitive.sexp_of_prim prim |> S.to_string in Basic_vec.push ssa (Call { rd; fn; args }); rd + | Cexpr_let { name; rhs; body; _ } -> + let rs = do_convert ssa rhs in + (match name with + | Pmutable_ident _ -> + (* We use `bytes` to represent arbitrary pointers. *) + let space = new_temp Mtype.T_bytes in + let rd = { name = Ident.to_string name; ty = Mtype.T_bytes } in + Basic_vec.push ssa (Malloc { rd = space; size = sizeof rd.ty }); + Basic_vec.push ssa (Assign { rd; rs = space }); + Basic_vec.push ssa (Store { rd; rs; offset = 0 }); + + | _ -> + let rd = { name = Ident.to_string name; ty = rs.ty } in + Basic_vec.push ssa (Assign { rd; rs })); + do_convert ssa body + | Cexpr_apply { func; args; ty; prim; _ } -> warn prim; let rd = new_temp ty in @@ -193,13 +359,280 @@ let rec do_convert ssa (expr: Mcore.expr) = Basic_vec.push ssa (Call { rd; fn; args }); rd + | Cexpr_sequence { expr1; expr2; _ } -> + do_convert ssa expr1 |> ignore; + do_convert ssa expr2 + + (* Meaning: access the `pos`-th field of `record` *) + | Cexpr_field { record; accessor; pos; ty; _ } -> + let rd = new_temp ty in + let rs = do_convert ssa record in + + let name = + (match rs.ty with + | T_constr id -> id + | _ -> failwith "riscv_ssa.ml: currently unsupported record type") + in + + (match accessor with + | Label _ -> () + | _ -> failwith "riscv_ssa.ml: currently unsupported accessor"); + + let offset = offsetof name pos in + Basic_vec.push ssa (Load { rd; rs; offset; }); + rd + + (* Meaning: set the `pos`-th field of `record` to `field` *) + | Cexpr_mutate { record; pos; field } -> + let rs = do_convert ssa record in + let rd = do_convert ssa field in + + let name = + (match rs.ty with + | T_constr id -> id + | _ -> failwith "riscv_ssa.ml: currently unsupported record type") + in + + let offset = offsetof name pos in + Basic_vec.push ssa (Store { rd; rs; offset; }); + { name = discard; ty = Mtype.T_unit } + + | Cexpr_if { cond; ifso; ifnot; ty; _ } -> + let rd = new_temp ty in + + let cond = do_convert ssa cond in + + let ifso_ssa = Basic_vec.make ~dummy:Nop 20 in + let ifso_result = do_convert ifso_ssa ifso in + + let ifnot_ssa = Basic_vec.make ~dummy:Nop 20 in + let ifnot_result = + (match ifnot with + | None -> { name = discard; ty = Mtype.T_unit } + | Some x -> do_convert ifnot_ssa x + ) + in + + let ifso_label = new_label "ifso_" in + let ifnot_label = new_label "ifnot_" in + let ifexit_label = new_label "ifexit_" in + + (* + Compiling into: + + br %cond true:%ifso false:%ifnot + + ifso: + ... + jump ifexit + + ifnot: + ... + jump ifexit + + ifexit: + %rd = φ %ifso_result[ifso] %ifnot_result[ifnot] + *) + + Basic_vec.push ssa (Branch { cond; ifso = ifso_label; ifnot = ifnot_label }); + + Basic_vec.push ssa (Label ifso_label); + Basic_vec.append ssa ifso_ssa; + Basic_vec.push ssa (Jump ifexit_label); + + Basic_vec.push ssa (Label ifnot_label); + Basic_vec.append ssa ifnot_ssa; + Basic_vec.push ssa (Jump ifexit_label); + + Basic_vec.push ssa (Label ifexit_label); + Basic_vec.push ssa (Phi + { rd; rs = [(ifso_result, ifso_label); (ifnot_result, ifnot_label)] }); + + rd + + (* + In MoonBit core IR, loops are by default not looping. + They only jump to beginning when they meet `Cexpr_continue`, + in which case their `args` will be substituted by the `args` provided there, + and the loop entry condition will be tested again. + + Therefore the loop is compiled as follows: + + before: + # evaluate args + jump head + + loop: + %arg = φ %arg[before] %arg1[cont1] ... %argn[contn] + ... + jump exit + + exit: + + A good thing is that loops don't return a value. We don't need to insert + φ after the label `exit`. + *) + | Cexpr_loop { params; body; args; label; ty } -> + (* We need to use the global variable `conts`. *) + (* In case there's an outer loop, we might have tampered it; *) + (* So we must store the contents somewhere. *) + let old_conts = !conts in + + (* Get the labels *) + let loop = Printf.sprintf "%s_%d" label.name label.stamp in + let before = Printf.sprintf "before_%s" loop in + let exit = Printf.sprintf "exit_%s" loop in + + (* Generate body. `conts` will be filled by Cexpr_continue. *) + let body_ssa = Basic_vec.make ~dummy:Nop 32 in + let _ = do_convert body_ssa body in + + (* Start generating according to the template described above. *) + + (* Generate `before`. *) + + Basic_vec.push ssa (Jump before); + Basic_vec.push ssa (Label before); + let results = List.map (do_convert ssa) args in + let cont = List.map (fun x -> (x, before)) results in + conts := cont :: !conts; + + (* Calculate the φ-call. *) + + Basic_vec.push ssa (Jump loop); + Basic_vec.push ssa (Label loop); + + let rec transpose lst = + match lst with + | [] -> [] + | [] :: _ -> [] + | _ -> List.map List.hd lst :: transpose (List.map List.tl lst) + in + + let grouped = transpose !conts in + let gen_phi (par: Mcore.param) rs = + Phi { rd = { name = Ident.to_string par.binder; ty = par.ty }; rs } + in + + let phis = List.map2 gen_phi params grouped in + List.iter (fun x -> Basic_vec.push ssa x) phis; + + (* Generate rest parts. *) + + Basic_vec.append ssa body_ssa; + Basic_vec.push ssa (Jump exit); + Basic_vec.push ssa (Label exit); + + (* Store `conts` back; let outer loop go on normally. *) + conts := old_conts; + + { name = discard; ty = Mtype.T_unit } + + (* See the explanation for Cexpr_loop. *) + | Cexpr_continue { args; label } -> + (* Generate a label, and let the previous block jump to this block. *) + let cont = new_label "continue_" in + Basic_vec.push ssa (Jump cont); + Basic_vec.push ssa (Label cont); + + (* Evaluate arguments and update `conts`. *) + let results = List.map (do_convert ssa) args in + let new_cont = List.map (fun x -> (x, cont)) results in + conts := new_cont :: !conts; + + (* Jump back to the beginning of the loop. *) + let loop_name = Printf.sprintf "%s_%d" label.name label.stamp in + Basic_vec.push ssa (Jump loop_name); + + { name = discard; ty = Mtype.T_unit } + + (* Assigns mutable variables. *) + | Cexpr_assign { var; expr; ty } -> + let rd = do_convert ssa expr in + let rs = { name = Ident.to_string var; ty = Mtype.T_bytes} in + Basic_vec.push ssa (Store { rd; rs; offset = 0 }); + { name = discard; ty = Mtype.T_unit } + + (* Builds a record type. *) + | Cexpr_record { fields; ty; } -> + (* Allocate space for the record *) + let rd = new_temp Mtype.T_bytes in + Basic_vec.push ssa (Malloc { rd; size = sizeof ty }); + + let name = + (match ty with + | Mtype.T_constr id -> id + (* This must be a record *) + | _ -> assert false) + in + + (* Construct all its fields *) + let visit ({ pos; expr; _ }: Mcore.field_def) = + let result = do_convert ssa expr in + let offset = offsetof name pos in + Basic_vec.push ssa (Store { rd = result; rs = rd; offset }) + in + + List.iter visit fields; + rd + + | Cexpr_break _ -> + prerr_endline "break"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_return _ -> + prerr_endline "return"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_letfn _ -> + prerr_endline "letfn"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_function _ -> + prerr_endline "function"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_constr _ -> + prerr_endline "constr"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_letrec _ -> + prerr_endline "letrec"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_tuple _ -> + prerr_endline "tuple"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_record_update _ -> + prerr_endline "record_update"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_switch_constr _ -> + prerr_endline "switch constr"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_switch_constant _ -> + prerr_endline "switch constant"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_handle_error _ -> + prerr_endline "handle error"; + { name = discard; ty = Mtype.T_unit } + + | Cexpr_array _ -> + prerr_endline "array"; + { name = discard; ty = Mtype.T_unit } + | Cexpr_const { c; ty; _ } -> let rd = new_temp ty in let instruction = (match c with | C_string imm -> AssignStr { rd; imm; } | C_bool imm -> - AssignInt { rd; imm = Int64.of_int (if imm then 1 else 0); size = Bit32; signed = true; } + AssignInt { rd; imm = Int64.of_int (if imm then 1 else 0); size = Bit8; signed = true; } + | C_char imm -> + AssignInt { rd; imm = Int64.of_int (Uchar.to_int imm); size = Bit8; signed = false; } | C_int { v; _ } -> AssignInt { rd; imm = Int64.of_int32 v; size = Bit32; signed = true; } | C_int64 { v; _ } -> @@ -212,12 +645,13 @@ let rec do_convert ssa (expr: Mcore.expr) = AssignFP { rd; imm = v; size = Bit32; } | C_double { v; _ } -> AssignFP { rd; imm = v; size = Bit64; } - | _ -> failwith "TODO: riscv_ssa.ml: unsupported constant type" + | C_bytes { v; _ } -> + AssignStr { rd; imm = v } + (* BigInt; currently not supported *) + | _ -> failwith "TODO: riscv_ssa.ml: bigint not supported" ) in Basic_vec.push ssa instruction; - rd - - | _ -> failwith "TODO: riscv_ssa.ml: cannot deal with this expression" + rd (** Converts given `expr` into a list of SSA instructions, @@ -228,11 +662,10 @@ let convert_expr (expr: Mcore.expr) = let return = do_convert ssa expr in (Basic_vec.map_into_list ssa (fun x -> x), return) +(** We will only do this with *) let convert_toplevel (top: Mcore.top_item) = match top with - | Ctop_expr _ -> [] - - | Ctop_fn { binder; func; export_info_; loc_ } -> + | Ctop_fn { binder; func; export_info_; _ } -> let var_of_param ({ binder; ty; _ } : Mcore.param) = { name = Ident.to_string binder; ty } in @@ -244,11 +677,21 @@ let convert_toplevel (top: Mcore.top_item) = [ FnDecl { fn; args; body; return } ] + + (* + No need to deal with stubs. + They are just declarations of builtin functions, which we don't care - + since they don't carry anything about implementation. + *) + | Ctop_stub _ -> [] - | _ -> [] + | _ -> failwith "TODO: riscv_ssa.ml: don't know this toplevel" let ssa_of_mcore (core: Mcore.t) = Basic_io.write_s "core.ir" (Mcore.sexp_of_t core); + (* Look through types, and calculate their field offsets *) + update_types core.types; + (* Deal with other functions *) let body = List.map convert_toplevel core.body |> List.flatten in @@ -256,7 +699,9 @@ let ssa_of_mcore (core: Mcore.t) = let with_main = match core.main with | Some (main_expr, _) -> let (main_body, return) = convert_expr main_expr in - let main_decl = FnDecl { fn = "main"; args = []; body = main_body; return } in + let main_decl = + FnDecl { fn = "main"; args = []; body = main_body; return } + in main_decl :: body | None -> body From da1fb5bfce09bc0fc14bd4c09a51630dbf101363 Mon Sep 17 00:00:00 2001 From: AdUhTkJm <2292398666@qq.com> Date: Sun, 22 Dec 2024 10:24:13 +0800 Subject: [PATCH 5/7] Implement some of intrinsics --- src/ast_derive.ml | 2 +- src/basic_lst.ml | 163 +++------------------------ src/driver_util.ml | 2 +- src/placeholder_env.ml | 2 +- src/riscv.ml | 41 +++++-- src/riscv_reg.ml | 138 ----------------------- src/riscv_ssa.ml | 248 ++++++++++++++++++++++++++++++++++++----- src/typer.ml | 2 +- 8 files changed, 264 insertions(+), 334 deletions(-) delete mode 100644 src/riscv_reg.ml diff --git a/src/ast_derive.ml b/src/ast_derive.ml index 65ff851..9520d81 100644 --- a/src/ast_derive.ml +++ b/src/ast_derive.ml @@ -1329,7 +1329,7 @@ let derive_from_json (trait : Syntax.type_name) (decl : Syntax.type_decl) in (S.pmap pattern, expr)) in - Lst.append_one map_cases err_case |> S.match_ json + List.append map_cases [err_case] |> S.match_ json | Ptd_record fields -> let vars = Lst.map fields (fun _ -> fresh_name "field") in let pattern = diff --git a/src/basic_lst.ml b/src/basic_lst.ml index c31deb5..6e66553 100644 --- a/src/basic_lst.ml +++ b/src/basic_lst.ml @@ -29,9 +29,9 @@ module Unsafe_external = Basic_unsafe_external module Arr = Basic_arr open Unsafe_external -let rec map l f = List.map f l +let map l f = List.map f l -let rec has_string (l : string list) query = List.mem query l +let has_string (l : string list) query = List.mem query l let rec map_combine l1 l2 f = match (l1, l2) with @@ -87,62 +87,15 @@ let rec map_split_opt (xs : 'a list) (f : 'a -> 'b option * 'c option) : ( (match c with Some c -> c :: cs | None -> cs), match d with Some d -> d :: ds | None -> ds )) -let rec map_snd l f = - match l with - | [] -> [] - | (v1, x1) :: [] -> - let y1 = f x1 in - [ (v1, y1) ] - | [ (v1, x1); (v2, x2) ] -> - let y1 = f x1 in - let y2 = f x2 in - [ (v1, y1); (v2, y2) ] - | [ (v1, x1); (v2, x2); (v3, x3) ] -> - let y1 = f x1 in - let y2 = f x2 in - let y3 = f x3 in - [ (v1, y1); (v2, y2); (v3, y3) ] - | [ (v1, x1); (v2, x2); (v3, x3); (v4, x4) ] -> - let y1 = f x1 in - let y2 = f x2 in - let y3 = f x3 in - let y4 = f x4 in - [ (v1, y1); (v2, y2); (v3, y3); (v4, y4) ] - | (v1, x1) :: (v2, x2) :: (v3, x3) :: (v4, x4) :: (v5, x5) :: tail -> - let y1 = f x1 in - let y2 = f x2 in - let y3 = f x3 in - let y4 = f x4 in - let y5 = f x5 in - (v1, y1) :: (v2, y2) :: (v3, y3) :: (v4, y4) :: (v5, y5) :: map_snd tail f +let map_snd l f = List.map (fun (a, b) -> (a, f b)) l let rec map_last l f = match l with | [] -> [] | x1 :: [] -> - let y1 = f true x1 in - [ y1 ] - | [ x1; x2 ] -> - let y1 = f false x1 in - let y2 = f true x2 in - [ y1; y2 ] - | [ x1; x2; x3 ] -> - let y1 = f false x1 in - let y2 = f false x2 in - let y3 = f true x3 in - [ y1; y2; y3 ] - | [ x1; x2; x3; x4 ] -> - let y1 = f false x1 in - let y2 = f false x2 in - let y3 = f false x3 in - let y4 = f true x4 in - [ y1; y2; y3; y4 ] - | x1 :: x2 :: x3 :: x4 :: tail -> - let y1 = f false x1 in - let y2 = f false x2 in - let y3 = f false x3 in - let y4 = f false x4 in - y1 :: y2 :: y3 :: y4 :: map_last tail f + let y1 = f true x1 in [ y1 ] + | x1 :: tail -> + let y1 = f false x1 in y1 :: map_last tail f let rec mapi_aux lst i f tail = match lst with @@ -151,7 +104,7 @@ let rec mapi_aux lst i f tail = let r = f i a in r :: mapi_aux l (i + 1) f tail -let mapi lst f = mapi_aux lst 0 f [] +let mapi lst f = List.mapi f lst let mapi_append lst f tail = mapi_aux lst 0 f tail let rec last xs = @@ -160,20 +113,6 @@ let rec last xs = | _ :: tl -> last tl | [] -> invalid_arg __FUNCTION__ -let rec append_aux l1 l2 = - match l1 with - | [] -> l2 - | a0 :: [] -> a0 :: l2 - | [ a0; a1 ] -> a0 :: a1 :: l2 - | [ a0; a1; a2 ] -> a0 :: a1 :: a2 :: l2 - | [ a0; a1; a2; a3 ] -> a0 :: a1 :: a2 :: a3 :: l2 - | [ a0; a1; a2; a3; a4 ] -> a0 :: a1 :: a2 :: a3 :: a4 :: l2 - | a0 :: a1 :: a2 :: a3 :: a4 :: rest -> - a0 :: a1 :: a2 :: a3 :: a4 :: append_aux rest l2 - -let append l1 l2 = match l2 with [] -> l1 | _ -> append_aux l1 l2 -let append_one l1 x = append_aux l1 [ x ] - let rec map_append l1 l2 f = match l1 with | [] -> l2 @@ -556,83 +495,15 @@ let rec rev_iter l f = match l with | [] -> () | x1 :: [] -> f x1 - | [ x1; x2 ] -> - f x2; - f x1 - | [ x1; x2; x3 ] -> - f x3; - f x2; - f x1 - | [ x1; x2; x3; x4 ] -> - f x4; - f x3; - f x2; - f x1 - | x1 :: x2 :: x3 :: x4 :: x5 :: tail -> + | x1 :: tail -> rev_iter tail f; - f x5; - f x4; - f x3; - f x2; f x1 -let rec iter l f = - match l with - | [] -> () - | x1 :: [] -> f x1 - | [ x1; x2 ] -> - f x1; - f x2 - | [ x1; x2; x3 ] -> - f x1; - f x2; - f x3 - | [ x1; x2; x3; x4 ] -> - f x1; - f x2; - f x3; - f x4 - | x1 :: x2 :: x3 :: x4 :: x5 :: tail -> - f x1; - f x2; - f x3; - f x4; - f x5; - iter tail f - -let rec iteri_aux l f i = - match l with - | [] -> () - | x1 :: [] -> f i x1 - | [ x1; x2 ] -> - f i x1; - f (i + 1) x2 - | [ x1; x2; x3 ] -> - f i x1; - f (i + 1) x2; - f (i + 2) x3 - | [ x1; x2; x3; x4 ] -> - f i x1; - f (i + 1) x2; - f (i + 2) x3; - f (i + 3) x4 - | x1 :: x2 :: x3 :: x4 :: x5 :: tail -> - f i x1; - f (i + 1) x2; - f (i + 2) x3; - f (i + 3) x4; - f (i + 4) x5; - iteri_aux tail f (i + 5) +let iter l f = List.iter f l -let iteri l f = iteri_aux l f 0 +let iteri l f = List.iteri f l -let rec iter2 l1 l2 f = - match (l1, l2) with - | [], [] -> () - | a1 :: l1, a2 :: l2 -> - f a1 a2; - iter2 l1 l2 f - | _, _ -> invalid_arg __FUNCTION__ +let iter2 l1 l2 f = List.iter2 f l1 l2 let rec for_all lst p = match lst with [] -> true | a :: l -> p a && for_all l p @@ -750,13 +621,6 @@ let rec assoc_by_opt lst comp k = let assoc_str lst str = assoc_by_opt lst String.equal str let assoc_str_exn lst str = assoc_by_string lst str None -let rec nth_aux l n = - match l with - | [] -> None - | a :: l -> if n = 0 then Some a else nth_aux l (n - 1) - -let nth_opt l n = if n < 0 then None else nth_aux l n - let rec iter_snd lst f = match lst with | [] -> () @@ -780,10 +644,9 @@ let rec exists_snd l p = match l with [] -> false | (_, a) :: l -> p a || exists_snd l p let rec concat_append (xss : 'a list list) (xs : 'a list) : 'a list = - match xss with [] -> xs | l :: r -> append l (concat_append r xs) + match xss with [] -> xs | l :: r -> List.append l (concat_append r xs) -let rec fold_left l accu f = - match l with [] -> accu | a :: l -> fold_left l (f accu a) f +let fold_left l init f = List.fold_left f init l let reduce_from_left lst fn = match lst with diff --git a/src/driver_util.ml b/src/driver_util.ml index 1459657..3e828d2 100644 --- a/src/driver_util.ml +++ b/src/driver_util.ml @@ -245,7 +245,7 @@ let wasm_gen ~(elim_unused_let : bool) (core : Mcore.t) ~clam_callback = let riscv_gen (core : Mcore.t) = core |> Riscv_ssa.ssa_of_mcore - |> Riscv.regalloc + |> Riscv.generate let link_core ~(shrink_wasm : bool) ~(elim_unused_let : bool) ~(core_inputs : core_input Basic_vec.t) diff --git a/src/placeholder_env.ml b/src/placeholder_env.ml index ec62a1b..fb9beba 100644 --- a/src/placeholder_env.ml +++ b/src/placeholder_env.ml @@ -132,7 +132,7 @@ let make ~foreign_types ~type_defs ~trait_defs = in add_trait name; let object_safety_status = - Lst.append + List.append (Hash_string.find_exn object_safety_of_traits name) (Vec.map_into_list not_object_safe_supers (fun super -> Trait_decl.Bad_super_trait super)) diff --git a/src/riscv.ml b/src/riscv.ml index cbd56fd..76f5f0d 100644 --- a/src/riscv.ml +++ b/src/riscv.ml @@ -1,24 +1,41 @@ (* RISC-V assembly commands. *) -module Reg = Riscv_reg +let registers = [| + (* Int registers *) + "zero"; "ra"; "sp"; "gp"; "tp"; + "t0"; "t1"; "t2"; "fp"; "s1"; + "a0"; "a1"; "a2"; "a3"; "a4"; + "a5"; "a6"; "a7"; "s2"; "s3"; + "s4"; "s5"; "s6"; "s7"; "s8"; + "s9"; "s10"; "s11"; "t3"; "t4"; + "t5"; "t6"; -type label = string + (* FP registers *) + "ft0"; "ft1"; "ft2"; "ft3"; "ft4"; + "ft5"; "ft6"; "ft7"; "fs0"; "fs1"; + "fa0"; "fa1"; "fa2"; "fa3"; "fa4"; + "fa5"; "fa6"; "fa7"; "fs2"; "fs3"; + "fs4"; "fs5"; "fs6"; "fs7"; "fs8"; + "fs9"; "fs10"; "fs11"; "ft8"; "ft9"; + "ft10"; "ft11"; +|] type t = -| Add of Reg.t * Reg.t * Reg.t -| Sub of Reg.t * Reg.t * Reg.t -| Mul of Reg.t * Reg.t * Reg.t -| Div of Reg.t * Reg.t * Reg.t -| Call of label -| Label of label +| Add of int * int * int +| Sub of int * int * int +| Mul of int * int * int +| Div of int * int * int +| Call of string +| Label of string let to_string asm = let convert_3reg ty rd rs1 rs2 = - let rd_str = Reg.to_string rd in - let rs1_str = Reg.to_string rs1 in - let rs2_str = Reg.to_string rs2 in + let rd_str = registers.(rd) in + let rs1_str = registers.(rs1) in + let rs2_str = registers.(rs2) in Printf.sprintf "%s %s, %s, %s" ty rd_str rs1_str rs2_str in + match asm with | Add (rd, rs1, rs2) -> convert_3reg "add" rd rs1 rs2 | Sub (rd, rs1, rs2) -> convert_3reg "sub" rd rs1 rs2 @@ -37,4 +54,4 @@ let to_asm_string asm = | Label _ -> to_string asm | _ -> " " ^ to_string asm -let regalloc ssa = [] \ No newline at end of file +let generate ssa = [] \ No newline at end of file diff --git a/src/riscv_reg.ml b/src/riscv_reg.ml deleted file mode 100644 index 7627734..0000000 --- a/src/riscv_reg.ml +++ /dev/null @@ -1,138 +0,0 @@ -(* This type consists of all 32 integer registers and 32 FP registers of RISC-V. *) -(* It also contains 2 special marks about spilt registers. *) -type t = -| Zero -| Ra -| Sp -| Gp -| Tp -| T0 -| T1 -| T2 -| Fp -| S1 -| A0 -| A1 -| A2 -| A3 -| A4 -| A5 -| A6 -| A7 -| S2 -| S3 -| S4 -| S5 -| S6 -| S7 -| S8 -| S9 -| S10 -| S11 -| T3 -| T4 -| T5 -| T6 -| Ft0 -| Ft1 -| Ft2 -| Ft3 -| Ft4 -| Ft5 -| Ft6 -| Ft7 -| Fs0 -| Fs1 -| Fa0 -| Fa1 -| Fa2 -| Fa3 -| Fa4 -| Fa5 -| Fa6 -| Fa7 -| Fs2 -| Fs3 -| Fs4 -| Fs5 -| Fs6 -| Fs7 -| Fs8 -| Fs9 -| Fs10 -| Fs11 -| Ft8 -| Ft9 -| Ft10 -| Ft11 -| Spilt of int (* This integer is offset from stack pointer. *) -| SpiltFP of int - -let to_string (t: t) = - match t with - | Zero -> "zero" - | Ra -> "ra" - | Sp -> "sp" - | Gp -> "gp" - | Tp -> "tp" - | T0 -> "t0" - | T1 -> "t1" - | T2 -> "t2" - | Fp -> "fp" - | S1 -> "s1" - | A0 -> "a0" - | A1 -> "a1" - | A2 -> "a2" - | A3 -> "a3" - | A4 -> "a4" - | A5 -> "a5" - | A6 -> "a6" - | A7 -> "a7" - | S2 -> "s2" - | S3 -> "s3" - | S4 -> "s4" - | S5 -> "s5" - | S6 -> "s6" - | S7 -> "s7" - | S8 -> "s8" - | S9 -> "s9" - | S10 -> "s10" - | S11 -> "s11" - | T3 -> "t3" - | T4 -> "t4" - | T5 -> "t5" - | T6 -> "t6" - | Ft0 -> "ft0" - | Ft1 -> "ft1" - | Ft2 -> "ft2" - | Ft3 -> "ft3" - | Ft4 -> "ft4" - | Ft5 -> "ft5" - | Ft6 -> "ft6" - | Ft7 -> "ft7" - | Fs0 -> "fs0" - | Fs1 -> "fs1" - | Fa0 -> "fa0" - | Fa1 -> "fa1" - | Fa2 -> "fa2" - | Fa3 -> "fa3" - | Fa4 -> "fa4" - | Fa5 -> "fa5" - | Fa6 -> "fa6" - | Fa7 -> "fa7" - | Fs2 -> "fs2" - | Fs3 -> "fs3" - | Fs4 -> "fs4" - | Fs5 -> "fs5" - | Fs6 -> "fs6" - | Fs7 -> "fs7" - | Fs8 -> "fs8" - | Fs9 -> "fs9" - | Fs10 -> "fs10" - | Fs11 -> "fs11" - | Ft8 -> "ft8" - | Ft9 -> "ft9" - | Ft10 -> "ft10" - | Ft11 -> "ft11" - | Spilt x -> "spilt." ^ Int.to_string x - | SpiltFP x -> "spilt.d." ^ Int.to_string x \ No newline at end of file diff --git a/src/riscv_ssa.ml b/src/riscv_ssa.ml index a613c69..84cd502 100644 --- a/src/riscv_ssa.ml +++ b/src/riscv_ssa.ml @@ -15,6 +15,18 @@ let discard = "_" let to_string (r: var) = Printf.sprintf "%s: %s" r.name (Mtype.to_string r.ty) +(** Similar to R-type instructions in RISC-V. *) +type r_type = { + rd: var; + rs1: var; + rs2: var; +} + +(** R-type, but only one operand. *) +type r2_type = { + rd: var; + rs1: var; +} (** Calls function named `fn` with args `args`, @@ -102,7 +114,6 @@ and fn = { fn: string; args: var list; body: t list; - return: var; } and branch = { @@ -113,14 +124,36 @@ and branch = { (** Instructions available in 3-address code and SSA. - -You might be surprised that there's no Add, Sub etc; -it's because they are all implemented as Call. - -In MoonBit core IR, they use calls to "primitive functions" -to realise this, and we do a direct translation. *) and t = +(* Arithmetic operations *) +| Add of r_type +| Sub of r_type +| Mul of r_type +| Div of r_type +| Mod of r_type +| Less of r_type +| Leq of r_type +| Great of r_type +| Geq of r_type +| Eq of r_type +| Neq of r_type +| Neg of r2_type + +(* Floating point operations *) +| FAdd of r_type +| FSub of r_type +| FMul of r_type +| FDiv of r_type +| FLess of r_type +| FLeq of r_type +| FGreat of r_type +| FGeq of r_type +| FEq of r_type +| FNeq of r_type +| FNeg of r2_type + +(* Others *) | Call of call_data | AssignInt of assign_int | AssignFP of assign_fp @@ -134,13 +167,43 @@ and t = | Phi of phi | FnDecl of fn | Malloc of malloc +| Return of var | Nop let to_string t = + let rtype op ({ rd; rs1; rs2 }: r_type) = + Printf.sprintf "%s = %s %s %s" (to_string rd) rs1.name op rs2.name + in + (** Deal with indentation inside functions. *) let rec str t depth = String.make (depth * 2) ' ' ^ match t with + | Add r -> rtype "+" r + | Sub r -> rtype "-" r + | Mul r -> rtype "*" r + | Div r -> rtype "/" r + | Mod r -> rtype "mod" r + | Less r -> rtype "<" r + | Leq r -> rtype "<=" r + | Great r -> rtype ">" r + | Geq r -> rtype ">=" r + | Eq r -> rtype "==" r + | Neq r -> rtype "!=" r + | Neg { rd; rs1 } -> Printf.sprintf "%s = -%s" (to_string rd) rs1.name + + | FAdd r -> rtype "+." r + | FSub r -> rtype "-." r + | FMul r -> rtype "*." r + | FDiv r -> rtype "/." r + | FLess r -> rtype "<." r + | FLeq r -> rtype "<=." r + | FGreat r -> rtype ">." r + | FGeq r -> rtype ">=." r + | FEq r -> rtype "==." r + | FNeq r -> rtype "!=." r + | FNeg { rd; rs1 } -> Printf.sprintf "%s = -%s" (to_string rd) rs1.name + | Call { rd; fn; args } -> let args_list = String.concat ", " (List.map (fun x -> x.name) args) in Printf.sprintf "%s = call %s (%s)" (to_string rd) fn args_list @@ -193,12 +256,14 @@ let to_string t = | Malloc { rd; size } -> Printf.sprintf "%s = malloc %d" rd.name size - | FnDecl { fn; args; body; return; } -> + | FnDecl { fn; args; body; } -> let args_str = String.concat ", " (List.map to_string args) in - let return_str = String.make (depth * 2 + 2) ' ' ^ "return " ^ return.name in let body_str = String.concat "\n" (List.map (fun t -> str t (depth + 1)) body) in - Printf.sprintf "fn %s (%s) {\n%s\n%s\n}\n" fn args_str body_str return_str + Printf.sprintf "fn %s (%s) {\n%s\n}\n" fn args_str body_str + + | Return var -> + Printf.sprintf "return %s" var.name | Nop -> "nop" @@ -260,9 +325,135 @@ let rec sizeof ty = | _ -> failwith "riscv_ssa.ml: cannot calculate size" -(** -Calculate offset of fields in record types. -*) +(** The variable defined in the instruction. *) +let def t = match t with +| Add { rd; _ } -> [rd] +| Sub { rd; _ } -> [rd] +| Mul { rd; _ } -> [rd] +| Div { rd; _ } -> [rd] +| Mod { rd; _ } -> [rd] +| Less { rd; _ } -> [rd] +| Leq { rd; _ } -> [rd] +| Great { rd; _ } -> [rd] +| Geq { rd; _ } -> [rd] +| Eq { rd; _ } -> [rd] +| Neq { rd; _ } -> [rd] +| Neg { rd; _ } -> [rd] +| FAdd { rd; _ } -> [rd] +| FSub { rd; _ } -> [rd] +| FMul { rd; _ } -> [rd] +| FDiv { rd; _ } -> [rd] +| FLess { rd; _ } -> [rd] +| FLeq { rd; _ } -> [rd] +| FGreat { rd; _ } -> [rd] +| FGeq { rd; _ } -> [rd] +| FEq { rd; _ } -> [rd] +| FNeq { rd; _ } -> [rd] +| FNeg { rd; _ } -> [rd] +| Call { rd; _ } -> [rd] +| AssignInt { rd; _ } -> [rd] +| AssignFP { rd; _ } -> [rd] +| AssignStr { rd; _ } -> [rd] +| Assign { rd; _ } -> [rd] +| Load { rd; _ } -> [rd] +| Store _ -> [] +| Jump _ -> [] +| Branch _ -> [] +| Label _ -> [] +| Phi { rd; _ } -> [rd] +| FnDecl _ -> [] +| Malloc { rd; _ } -> [rd] +| Return _ -> [] +| Nop -> [] + +(** Variables that has been accessed in this instruction. *) +let use t = match t with +| Add { rs1; rs2; _ } -> [rs1; rs2] +| Sub { rs1; rs2; _ } -> [rs1; rs2] +| Mul { rs1; rs2; _ } -> [rs1; rs2] +| Div { rs1; rs2; _ } -> [rs1; rs2] +| Mod { rs1; rs2; _ } -> [rs1; rs2] +| Less { rs1; rs2; _ } -> [rs1; rs2] +| Leq { rs1; rs2; _ } -> [rs1; rs2] +| Great { rs1; rs2; _ } -> [rs1; rs2] +| Geq { rs1; rs2; _ } -> [rs1; rs2] +| Eq { rs1; rs2; _ } -> [rs1; rs2] +| Neq { rs1; rs2; _ } -> [rs1; rs2] +| Neg { rs1; _ } -> [rs1] +| FAdd { rs1; rs2; _ } -> [rs1; rs2] +| FSub { rs1; rs2; _ } -> [rs1; rs2] +| FMul { rs1; rs2; _ } -> [rs1; rs2] +| FDiv { rs1; rs2; _ } -> [rs1; rs2] +| FLess { rs1; rs2; _ } -> [rs1; rs2] +| FLeq { rs1; rs2; _ } -> [rs1; rs2] +| FGreat { rs1; rs2; _ } -> [rs1; rs2] +| FGeq { rs1; rs2; _ } -> [rs1; rs2] +| FEq { rs1; rs2; _ } -> [rs1; rs2] +| FNeq { rs1; rs2; _ } -> [rs1; rs2] +| FNeg { rs1; _ } -> [rs1] +| Call { args; _ } -> args +| AssignInt _ -> [] +| AssignFP _ -> [] +| AssignStr _ -> [] +| Assign { rs; _ } -> [rs] +| Load { rs; _ } -> [rs] +| Store { rd; rs; _ } -> [rd; rs] +| Jump _ -> [] +| Branch { cond; } -> [cond] +| Label _ -> [] +| Phi { rs } -> List.map (fun (var, label) -> var) rs +| FnDecl _ -> [] +| Malloc _ -> [] +| Return var -> [var] +| Nop -> [] + + +(** Push the correct sequence of instruction based on primitives. *) +let deal_with_prim ssa rd (prim: Primitive.prim) args = + let die () = + failwith "riscv_ssa.ml: bad primitive format" + in + + match prim with + | Pcomparison { operand_type; operator } -> + let is_fp = (operand_type = F32 || operand_type = F64) in + let op = (match is_fp, operator, args with + | false, Lt, [rs1; rs2] -> (Less { rd; rs1; rs2 }) + | true, Lt, [rs1; rs2] -> (FLess { rd; rs1; rs2 }) + | false, Gt, [rs1; rs2] -> (Great { rd; rs1; rs2 }) + | true, Gt, [rs1; rs2] -> (FGreat { rd; rs1; rs2 }) + | false, Ne, [rs1; rs2] -> (Neq { rd; rs1; rs2 }) + | true, Ne, [rs1; rs2] -> (FNeq { rd; rs1; rs2 }) + | false, Eq, [rs1; rs2] -> (Eq { rd; rs1; rs2 }) + | true, Eq, [rs1; rs2] -> (FEq { rd; rs1; rs2 }) + | false, Le, [rs1; rs2] -> (Leq { rd; rs1; rs2 }) + | true, Le, [rs1; rs2] -> (FLeq { rd; rs1; rs2 }) + | false, Ge, [rs1; rs2] -> (Geq { rd; rs1; rs2 }) + | true, Ge, [rs1; rs2] -> (FGeq { rd; rs1; rs2 }) + | _ -> die ()) in + Basic_vec.push ssa op + + | Parith { operand_type; operator } -> + let is_fp = (operand_type = F32 || operand_type = F64) in + let op = (match is_fp, operator, args with + | false, Add, [rs1; rs2] -> (Add { rd; rs1; rs2 }) + | true, Add, [rs1; rs2] -> (FAdd { rd; rs1; rs2 }) + | false, Sub, [rs1; rs2] -> (Sub { rd; rs1; rs2 }) + | true, Sub, [rs1; rs2] -> (FSub { rd; rs1; rs2 }) + | false, Mul, [rs1; rs2] -> (Mul { rd; rs1; rs2 }) + | true, Mul, [rs1; rs2] -> (FMul { rd; rs1; rs2 }) + | false, Div, [rs1; rs2] -> (Div { rd; rs1; rs2 }) + | true, Div, [rs1; rs2] -> (FDiv { rd; rs1; rs2 }) + | false, Mod, [rs1; rs2] -> (Mod { rd; rs1; rs2 }) + | false, Neg, [rs1] -> (Neg { rd; rs1 }) + | true, Neg, [rs1] -> (FNeg { rd; rs1 }) + | _ -> die ()) in + Basic_vec.push ssa op + + | _ -> Basic_vec.push ssa (Call { rd; fn = (Primitive.sexp_of_prim prim |> S.to_string); args }) + + +(** Calculate offset of fields in record types. *) let update_types ({ defs; _ }: Mtype.defs) = let types = Mtype.Id_hash.to_list defs in @@ -323,16 +514,14 @@ let rec do_convert ssa (expr: Mcore.expr) = | Cexpr_object { self; } -> do_convert ssa self - (* - We treat primitives like special functions. - - TODO: Now their names are hard to read; perhaps tidy it up sometime. - *) + (* Primitives are intrinsic functions. *) + (* We tidy some of these up, and compile others into functions. *) | Cexpr_prim { prim; args; ty; _ } -> let rd = new_temp ty in let args = List.map (fun expr -> do_convert ssa expr) args in - let fn = Primitive.sexp_of_prim prim |> S.to_string in - Basic_vec.push ssa (Call { rd; fn; args }); + (* TODO: take special care with Psequand and Psequor. *) + (* They are short-circuited and should be compiled into if-else. *) + deal_with_prim ssa rd prim args; rd | Cexpr_let { name; rhs; body; _ } -> @@ -371,12 +560,12 @@ let rec do_convert ssa (expr: Mcore.expr) = let name = (match rs.ty with | T_constr id -> id - | _ -> failwith "riscv_ssa.ml: currently unsupported record type") + | _ -> failwith "TODO: riscv_ssa.ml: currently unsupported record type") in (match accessor with | Label _ -> () - | _ -> failwith "riscv_ssa.ml: currently unsupported accessor"); + | _ -> failwith "TODO: riscv_ssa.ml: currently unsupported accessor"); let offset = offsetof name pos in Basic_vec.push ssa (Load { rd; rs; offset; }); @@ -390,7 +579,7 @@ let rec do_convert ssa (expr: Mcore.expr) = let name = (match rs.ty with | T_constr id -> id - | _ -> failwith "riscv_ssa.ml: currently unsupported record type") + | _ -> failwith "riscv_ssa.ml: can only mutate record types") in let offset = offsetof name pos in @@ -660,7 +849,8 @@ along with the variable in which the result of this expression is stored. let convert_expr (expr: Mcore.expr) = let ssa = Basic_vec.make ~dummy:Nop 20 in let return = do_convert ssa expr in - (Basic_vec.map_into_list ssa (fun x -> x), return) + Basic_vec.push ssa (Return return); + Basic_vec.map_into_list ssa (fun x -> x) (** We will only do this with *) let convert_toplevel (top: Mcore.top_item) = @@ -671,11 +861,11 @@ let convert_toplevel (top: Mcore.top_item) = in let fn = Ident.to_string binder in let args = List.map var_of_param func.params in - let (body, return) = convert_expr func.body in + let body = convert_expr func.body in if export_info_ != None then prerr_endline "warning: export info is non-empty"; [ - FnDecl { fn; args; body; return } + FnDecl { fn; args; body } ] (* @@ -698,10 +888,8 @@ let ssa_of_mcore (core: Mcore.t) = (* Deal with main *) let with_main = match core.main with | Some (main_expr, _) -> - let (main_body, return) = convert_expr main_expr in - let main_decl = - FnDecl { fn = "main"; args = []; body = main_body; return } - in + let main_body = convert_expr main_expr in + let main_decl = FnDecl { fn = "main"; args = []; body = main_body } in main_decl :: body | None -> body diff --git a/src/typer.ml b/src/typer.ml index c21f48d..b759c6a 100644 --- a/src/typer.ml +++ b/src/typer.ml @@ -757,7 +757,7 @@ let rec infer_expr (env : Local_env.t) (expr : Syntax.expr) Type.filter_product ~blame:Filtered_type ~arity:None ty_tuple loc_ with | Ok tys -> ( - match Lst.nth_opt tys index with + match List.nth_opt tys index with | Some ty -> ty | None -> add_error diagnostics From 09ba122a210ef9edeaea15852b5b0bedd1ec54fc Mon Sep 17 00:00:00 2001 From: AdUhTkJm <2292398666@qq.com> Date: Sun, 22 Dec 2024 13:32:35 +0800 Subject: [PATCH 6/7] Update documentation and tidy up --- README.md | 10 +-- README.zh.md | 6 +- src/riscv_opt.ml | 176 ++++++++++++++++++++++++++++++++++++++++++++++ src/riscv_ssa.ml | 179 ++++++++++++++++++++--------------------------- 4 files changed, 263 insertions(+), 108 deletions(-) create mode 100644 src/riscv_opt.ml diff --git a/README.md b/README.md index 1c0abfb..aaa88d0 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ You would also need to build the core library, as instructed in the following se ### Usage -MoonBit's core library is typically installed in `~/.moon/lib/core/`. In following commands, we use `$core` to denote the path. The language is shipped with pre-built libraries under different targets: `js`, `wasm` and `wasm-gc`; however, this compiler currently supports only `wasm-gc`. Let `$target` stand for this value. +MoonBit's core library is typically installed in `~/.moon/lib/core/`. In following commands, we use `$core` to denote the path. You can choose your target between `riscv` and `wasm-gc`, which we denote by `$target`. Currently, `riscv` will only produce a `.ssa` file for static single assignment IR, and does not proceed to generate assembly. We use `$src` to denote the path to your main package. This package must contain, along with your source files, a `moon.pkg.json`; if you're not sure how this works, you can use [moon](https://github.com/moonbitlang/moon) to initialize a MoonBit repository. @@ -67,15 +67,17 @@ moon bundle --source-dir $core We strongly recommend that you build the core library yourself via the commands above. The pre-built binaries are not always compatible with this compiler, as MoonBit is still under development. -You should verify that there is a folder called `wasm-gc` under `$core/target`. +You should verify that now there is a folder called `wasm-gc` under `$core/target`. Now you can compile `.mbt` files with these commands: ```bash -bundled=$core/target/$target/release/bundle +# Even if you are targeting RISC-V, you can still use this path. +# That's because it's intermediate representation (IR) in the bundle; +# it is ignorant of target. +bundled=$core/target/wasm-gc/release/bundle # Here, main.mbt should be a file containing `fn main`. -# `build-package` produces intermediate representation (IR); it is ignorant of target. moonc build-package $src/main.mbt -is-main -std-path $bundled -o $obj # If you have more than one package, remember to include all of them in -pkg-sources. They should be separated by colon ':'. diff --git a/README.zh.md b/README.zh.md index a235198..3d4c37d 100644 --- a/README.zh.md +++ b/README.zh.md @@ -41,7 +41,7 @@ dune build -p moonbit-lang ### 使用 -MoonBit 的核心库一般安装在 `~/.moon/lib/core` 下。在下面的命令中,我们会用 `$core` 表示核心库的安装路径。在 `$core/target` 下,有 `js`, `wasm` 和 `wasm-gc` 这三个文件夹,它们包含在对应目标下编译好的核心库。我们用 `$target` 表示这三者之一。 +MoonBit 的核心库一般安装在 `~/.moon/lib/core` 下。在下面的命令中,我们会用 `$core` 表示核心库的安装路径。你可以选择 `riscv` 或 `wasm-gc` 作为编译目标,我们用 `$target` 表示这两者之一。值得注意的是,目前 `riscv` 只会产生 SSA 文件,而不会产生汇编代码。 `$src` 表示源代码的路径;在这个文件夹下,除了源代码之外还必须包括一个 `moon.pkg.json`。如果你不清楚如何编写这个文件,可以考虑使用 [moon](https://github.com/moonbitlang/moon) 来初始化。 @@ -70,7 +70,9 @@ moon bundle --source-dir $core 现在你可以使用这些命令来编译 `.mbt` 文件: ```bash -bundled=$core/target/$target/release/bundle +# 即使 $target 是 `riscv`,也依然可以使用这个路径。 +# 这是因为 bundle 文件夹的内容是中间表示 (IR),它和编译目标无关。 +bundled=$core/target/wasm-gc/release/bundle # 这里 main.mbt 是一个含有 `fn main` 的文件。 moonc build-package $src/main.mbt -is-main -std-path $core/target/$bundled -o $obj -target $target diff --git a/src/riscv_opt.ml b/src/riscv_opt.ml new file mode 100644 index 0000000..150b7ae --- /dev/null +++ b/src/riscv_opt.ml @@ -0,0 +1,176 @@ +(** Does all sorts of optimizations. *) + +(** Instruction in SSA form; feel free to change it to anything you'd like *) +type instruction = Riscv_ssa.t + +type basic_block = { + body: instruction Basic_vec.t; + succ: string Basic_vec.t; + pred: string Basic_vec.t; +} + +let make () = + { + body = Basic_vec.empty (); + succ = Basic_vec.empty (); + pred = Basic_vec.empty (); + } + + +(** We use the name of a basic block to refer to it. *) +let basic_blocks = Hashtbl.create 1024 + +(** The exit block(s) for each function `fn`, i.e. whose final instruction is `return`. *) +let exit_fn = Hashtbl.create 256 + +(** Get the basic block with label `name`. *) +let block_of name = Hashtbl.find basic_blocks name + +(** +Builds control flow graph. + +Does not return anything; +stores all information in `basic_block`. +*) +let build_cfg fn body = + (* Identify all basic blocks *) + + (* The first basic block in each function is unnamed, *) + (* so we take the function name as its name. *) + let name = ref fn in + let vec = ref (Basic_vec.make ~dummy:Riscv_ssa.Nop 16) in + + let separate_basic_block (inst: instruction) = + (match inst with + | Label label -> + Hashtbl.add basic_blocks !name (make ()); + Basic_vec.append (block_of !name).body (!vec); + (* Clear the instructions; Basic_vec does not offer clear() or something alike *) + vec := Basic_vec.make ~dummy:Riscv_ssa.Nop 16; + name := label + + | x -> Basic_vec.push !vec x) + in + List.iter separate_basic_block body; + + (* The last basic block is missed by `separate_basic_block` *) + (* Manually add it *) + Hashtbl.add basic_blocks !name (make ()); + Basic_vec.append (block_of !name).body (!vec); + + Hashtbl.add exit_fn fn (Basic_vec.empty ()); + + (* Find successors of each block. *) + + (* From the generation of SSA, *) + (* it is guaranteed that the structure of basic block is preserved; *) + (* i.e. only the last instruction can be jump/branch/return. *) + (* So we just look at them. *) + let rec find_succ name = + if not (Hashtbl.mem basic_blocks name) then + let block = block_of name in + let successors = + (match Basic_vec.last block.body with + | Jump target -> [target] + | Branch { ifso; ifnot } -> [ifso; ifnot] + | Return _ -> Basic_vec.push (Hashtbl.find exit_fn fn) name; [] + | _ -> failwith "riscv_opt.ml: malformed SSA") + in + Basic_vec.append block.succ (Basic_vec.of_list successors); + List.iter find_succ successors + in + find_succ fn; + + (* Find predecessors *) + Hashtbl.iter (fun name block -> + Basic_vec.iter block.succ (fun succ -> Basic_vec.push (block_of succ).pred name) + ) basic_blocks + +let visit_fn f ssa = + let visit (toplevel: instruction) = + match toplevel with + | FnDecl { fn; body; _ } -> f fn body + | _ -> () + in + List.iter visit ssa + +let map_fn f ssa = + let map_aux (toplevel: instruction) = + match toplevel with + | FnDecl { fn; body; args; } -> Riscv_ssa.FnDecl { fn; body = f fn; args } + | x -> x + in + List.map map_aux ssa + +(** Sets to store live variables. *) +module Varset = Set.Make(String) + +(** +Liveness analysis. + +Takes the entry block of a function, and returns a hash table: +for each basic block in this function, +this hash table gives all variables alive at the exit of it. +*) +let liveness_analysis fn = + let live_in = Hashtbl.create 1024 in + let live_out = Hashtbl.create 1024 in + + (* Find all basic blocks in the function `fn` *) + let blocks = Basic_vec.make ~dummy:"" 32 in + let visited = ref Varset.empty in + let rec get_blocks x = + if not (Varset.mem x !visited) then + Basic_vec.push blocks x; + visited := Varset.add x !visited; + Basic_vec.iter (block_of x).succ get_blocks + in + get_blocks fn; + let blocks = Basic_vec.to_list blocks in + + (* Initialize live_in and live_out to empty *) + List.iter (fun name -> + Hashtbl.add live_in name Varset.empty; + Hashtbl.add live_out name Varset.empty + ) blocks; + + (* Keep doing until reaches fixed point *) + let rec iterate worklist = + let last_item = Basic_vec.pop_no_compact worklist in + match last_item with + | None -> () + | Some fn -> + List.iter (fun name -> + let block = block_of name in + let old_live_in = Hashtbl.find live_in name in + + (* Update live_out *) + (* It should be the union of live_in of all successors *) + let new_live_out = + List.fold_left (fun x succ_name -> + Varset.union x (Hashtbl.find live_in succ_name) + ) Varset.empty (Basic_vec.to_list block.succ) + in + + Hashtbl.replace live_out name new_live_out; + + (* Re-calculate live-in *) + let body = Basic_vec.to_list block.body in + let def_var = List.concat_map Riscv_ssa.def body in + let use_var = List.concat_map Riscv_ssa.use body in + let def = List.map (fun (x: Riscv_ssa.var) -> x.name) def_var |> Varset.of_list in + let use = List.map (fun (x: Riscv_ssa.var) -> x.name) use_var |> Varset.of_list in + let new_live_in = Varset.union use (Varset.diff new_live_out def) in + + (* If live-in has changed, then all predecessors are subject to change; *) + (* Push all of them into worklist *) + if not (Varset.equal old_live_in new_live_in) then + Hashtbl.replace live_in name new_live_in; + Basic_vec.append worklist block.pred; + + iterate worklist + ) blocks; + in + iterate (Hashtbl.find exit_fn fn); + + live_out \ No newline at end of file diff --git a/src/riscv_ssa.ml b/src/riscv_ssa.ml index 84cd502..59cece4 100644 --- a/src/riscv_ssa.ml +++ b/src/riscv_ssa.ml @@ -11,6 +11,7 @@ type var = { We store all discarded values (e.g. unit) into variable of this name. *) let discard = "_" +let unit = { name = discard; ty = Mtype.T_unit } let to_string (r: var) = Printf.sprintf "%s: %s" r.name (Mtype.to_string r.ty) @@ -325,88 +326,64 @@ let rec sizeof ty = | _ -> failwith "riscv_ssa.ml: cannot calculate size" -(** The variable defined in the instruction. *) -let def t = match t with -| Add { rd; _ } -> [rd] -| Sub { rd; _ } -> [rd] -| Mul { rd; _ } -> [rd] -| Div { rd; _ } -> [rd] -| Mod { rd; _ } -> [rd] -| Less { rd; _ } -> [rd] -| Leq { rd; _ } -> [rd] -| Great { rd; _ } -> [rd] -| Geq { rd; _ } -> [rd] -| Eq { rd; _ } -> [rd] -| Neq { rd; _ } -> [rd] -| Neg { rd; _ } -> [rd] -| FAdd { rd; _ } -> [rd] -| FSub { rd; _ } -> [rd] -| FMul { rd; _ } -> [rd] -| FDiv { rd; _ } -> [rd] -| FLess { rd; _ } -> [rd] -| FLeq { rd; _ } -> [rd] -| FGreat { rd; _ } -> [rd] -| FGeq { rd; _ } -> [rd] -| FEq { rd; _ } -> [rd] -| FNeq { rd; _ } -> [rd] -| FNeg { rd; _ } -> [rd] -| Call { rd; _ } -> [rd] -| AssignInt { rd; _ } -> [rd] -| AssignFP { rd; _ } -> [rd] -| AssignStr { rd; _ } -> [rd] -| Assign { rd; _ } -> [rd] -| Load { rd; _ } -> [rd] -| Store _ -> [] -| Jump _ -> [] -| Branch _ -> [] -| Label _ -> [] -| Phi { rd; _ } -> [rd] -| FnDecl _ -> [] -| Malloc { rd; _ } -> [rd] -| Return _ -> [] -| Nop -> [] +(** Maps all result registers with `fd` and all operands with `fs`. *) +let rec reg_map fd fs t = match t with +| Add { rd; rs1; rs2; } -> Add { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Sub { rd; rs1; rs2; } -> Sub { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Mul { rd; rs1; rs2; } -> Mul { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Div { rd; rs1; rs2; } -> Div { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Mod { rd; rs1; rs2; } -> Mod { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Less { rd; rs1; rs2; } -> Less { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Leq { rd; rs1; rs2; } -> Leq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Great { rd; rs1; rs2; } -> Great { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Geq { rd; rs1; rs2; } -> Geq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Eq { rd; rs1; rs2; } -> Eq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Neq { rd; rs1; rs2; } -> Neq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| Neg { rd; rs1 } -> Neg { rd = fd rd; rs1 = fs rs1 } +| FAdd { rd; rs1; rs2; } -> FAdd { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FSub { rd; rs1; rs2; } -> FSub { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FMul { rd; rs1; rs2; } -> FMul { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FDiv { rd; rs1; rs2; } -> FDiv { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FLess { rd; rs1; rs2; } -> FLess { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FLeq { rd; rs1; rs2; } -> FLeq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FGreat { rd; rs1; rs2; } -> FGreat { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FGeq { rd; rs1; rs2; } -> FGeq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FEq { rd; rs1; rs2; } -> FEq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FNeq { rd; rs1; rs2; } -> FNeq { rd = fd rd; rs1 = fs rs1; rs2 = fs rs2 } +| FNeg { rd; rs1 } -> FNeg { rd = fd rd; rs1 = fs rs1 } +| Call { rd; fn; args } -> Call { rd = fd rd; fn; args = List.map fs args } +| AssignInt { rd; imm; size; signed } -> AssignInt { rd = fd rd; imm; size; signed } +| AssignFP { rd; imm; size } -> AssignFP { rd = fd rd; imm; size; } +| AssignStr { rd; imm } -> AssignStr { rd = fd rd; imm; } +| Assign { rd; rs } -> Assign { rd = fd rd; rs = fs rs } +| Load { rd; rs; offset } -> Load { rd = fd rd; rs = fs rs; offset } +| Store { rd; rs; offset } -> Load { rd = fs rd; rs = fs rs; offset } +| Jump label -> Jump label +| Branch { cond; ifso; ifnot } -> Branch { cond = fs cond; ifso; ifnot } +| Label label -> Label label +| Phi { rd; rs } -> Phi { rd = fd rd; rs = List.map (fun (x, name) -> (fs x, name)) rs } +| FnDecl { fn; args; body } -> FnDecl { fn; args; body = List.map (fun x -> reg_map fd fs x) body } +| Malloc { rd; size } -> Malloc { rd = fd rd; size } +| Return var -> Return (fs var) +| Nop -> Nop (** Variables that has been accessed in this instruction. *) -let use t = match t with -| Add { rs1; rs2; _ } -> [rs1; rs2] -| Sub { rs1; rs2; _ } -> [rs1; rs2] -| Mul { rs1; rs2; _ } -> [rs1; rs2] -| Div { rs1; rs2; _ } -> [rs1; rs2] -| Mod { rs1; rs2; _ } -> [rs1; rs2] -| Less { rs1; rs2; _ } -> [rs1; rs2] -| Leq { rs1; rs2; _ } -> [rs1; rs2] -| Great { rs1; rs2; _ } -> [rs1; rs2] -| Geq { rs1; rs2; _ } -> [rs1; rs2] -| Eq { rs1; rs2; _ } -> [rs1; rs2] -| Neq { rs1; rs2; _ } -> [rs1; rs2] -| Neg { rs1; _ } -> [rs1] -| FAdd { rs1; rs2; _ } -> [rs1; rs2] -| FSub { rs1; rs2; _ } -> [rs1; rs2] -| FMul { rs1; rs2; _ } -> [rs1; rs2] -| FDiv { rs1; rs2; _ } -> [rs1; rs2] -| FLess { rs1; rs2; _ } -> [rs1; rs2] -| FLeq { rs1; rs2; _ } -> [rs1; rs2] -| FGreat { rs1; rs2; _ } -> [rs1; rs2] -| FGeq { rs1; rs2; _ } -> [rs1; rs2] -| FEq { rs1; rs2; _ } -> [rs1; rs2] -| FNeq { rs1; rs2; _ } -> [rs1; rs2] -| FNeg { rs1; _ } -> [rs1] -| Call { args; _ } -> args -| AssignInt _ -> [] -| AssignFP _ -> [] -| AssignStr _ -> [] -| Assign { rs; _ } -> [rs] -| Load { rs; _ } -> [rs] -| Store { rd; rs; _ } -> [rd; rs] -| Jump _ -> [] -| Branch { cond; } -> [cond] -| Label _ -> [] -| Phi { rs } -> List.map (fun (var, label) -> var) rs -| FnDecl _ -> [] -| Malloc _ -> [] -| Return var -> [var] -| Nop -> [] +let use t = + (* Special care for phi. We'll take care of it in riscv_opt.ml. *) + match t with + | Phi _ -> [] + | _ -> + let result = ref [] in + let fs = (fun x -> result := x :: !result; unit) in + reg_map (fun _ -> unit) fs t |> ignore; + !result +(** The variable defined in the instruction. *) +let def t = + let result = ref [] in + let fd = (fun x -> result := x :: !result; unit) in + reg_map fd (fun _ -> unit) t |> ignore; + !result (** Push the correct sequence of instruction based on primitives. *) let deal_with_prim ssa rd (prim: Primitive.prim) args = @@ -493,7 +470,7 @@ It returns the variable in which the result of the last instruction pushed is st let rec do_convert ssa (expr: Mcore.expr) = match expr with | Cexpr_unit _ -> - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_var { id; ty; prim; _ } -> warn prim; @@ -584,7 +561,7 @@ let rec do_convert ssa (expr: Mcore.expr) = let offset = offsetof name pos in Basic_vec.push ssa (Store { rd; rs; offset; }); - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_if { cond; ifso; ifnot; ty; _ } -> let rd = new_temp ty in @@ -597,7 +574,7 @@ let rec do_convert ssa (expr: Mcore.expr) = let ifnot_ssa = Basic_vec.make ~dummy:Nop 20 in let ifnot_result = (match ifnot with - | None -> { name = discard; ty = Mtype.T_unit } + | None -> unit | Some x -> do_convert ifnot_ssa x ) in @@ -661,7 +638,7 @@ let rec do_convert ssa (expr: Mcore.expr) = A good thing is that loops don't return a value. We don't need to insert φ after the label `exit`. *) - | Cexpr_loop { params; body; args; label; ty } -> + | Cexpr_loop { params; body; args; label; _ } -> (* We need to use the global variable `conts`. *) (* In case there's an outer loop, we might have tampered it; *) (* So we must store the contents somewhere. *) @@ -714,8 +691,7 @@ let rec do_convert ssa (expr: Mcore.expr) = (* Store `conts` back; let outer loop go on normally. *) conts := old_conts; - - { name = discard; ty = Mtype.T_unit } + unit (* See the explanation for Cexpr_loop. *) | Cexpr_continue { args; label } -> @@ -732,15 +708,14 @@ let rec do_convert ssa (expr: Mcore.expr) = (* Jump back to the beginning of the loop. *) let loop_name = Printf.sprintf "%s_%d" label.name label.stamp in Basic_vec.push ssa (Jump loop_name); - - { name = discard; ty = Mtype.T_unit } + unit (* Assigns mutable variables. *) | Cexpr_assign { var; expr; ty } -> let rd = do_convert ssa expr in let rs = { name = Ident.to_string var; ty = Mtype.T_bytes} in Basic_vec.push ssa (Store { rd; rs; offset = 0 }); - { name = discard; ty = Mtype.T_unit } + unit (* Builds a record type. *) | Cexpr_record { fields; ty; } -> @@ -767,51 +742,51 @@ let rec do_convert ssa (expr: Mcore.expr) = | Cexpr_break _ -> prerr_endline "break"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_return _ -> prerr_endline "return"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_letfn _ -> prerr_endline "letfn"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_function _ -> prerr_endline "function"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_constr _ -> prerr_endline "constr"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_letrec _ -> prerr_endline "letrec"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_tuple _ -> prerr_endline "tuple"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_record_update _ -> prerr_endline "record_update"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_switch_constr _ -> prerr_endline "switch constr"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_switch_constant _ -> prerr_endline "switch constant"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_handle_error _ -> prerr_endline "handle error"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_array _ -> prerr_endline "array"; - { name = discard; ty = Mtype.T_unit } + unit | Cexpr_const { c; ty; _ } -> let rd = new_temp ty in @@ -852,7 +827,7 @@ let convert_expr (expr: Mcore.expr) = Basic_vec.push ssa (Return return); Basic_vec.map_into_list ssa (fun x -> x) -(** We will only do this with *) + let convert_toplevel (top: Mcore.top_item) = match top with | Ctop_fn { binder; func; export_info_; _ } -> @@ -871,7 +846,7 @@ let convert_toplevel (top: Mcore.top_item) = (* No need to deal with stubs. They are just declarations of builtin functions, which we don't care - - since they don't carry anything about implementation. + since they don't carry any information about implementation. *) | Ctop_stub _ -> [] From 46ad0774539f906168792be84ceaaf121950bca8 Mon Sep 17 00:00:00 2001 From: AdUhTkJm <2292398666@qq.com> Date: Sun, 22 Dec 2024 18:43:31 +0800 Subject: [PATCH 7/7] Remove multiple jumps at the end of basic block --- src/basic_vec.ml | 13 +++++++- src/driver_util.ml | 1 + src/riscv_opt.ml | 54 +++++++++++++++++++++++++++++---- src/riscv_ssa.ml | 75 ++++++++++++++++++++++++++++++++++++---------- 4 files changed, 122 insertions(+), 21 deletions(-) diff --git a/src/basic_vec.ml b/src/basic_vec.ml index 9461174..6c7bd6f 100644 --- a/src/basic_vec.ml +++ b/src/basic_vec.ml @@ -188,7 +188,7 @@ let insert (d : 'a t) idx elt = d.arr.(idx) <- elt; d.len <- d.len + 1 -let pop_no_compact (d : 'a t) : 'a option = +let pop_opt (d : 'a t) : 'a option = let d_len = d.len in if d_len = 0 then None else @@ -198,3 +198,14 @@ let pop_no_compact (d : 'a t) : 'a option = fill_with_junk_ d_arr last_index 1; d.len <- last_index; Some last + +let pop (d : 'a t) : 'a = + let d_len = d.len in + if d_len = 0 then failwith __FUNCTION__ + else + let d_arr = d.arr in + let last_index = d_len - 1 in + let last = d_arr.!(last_index) in + fill_with_junk_ d_arr last_index 1; + d.len <- last_index; + last \ No newline at end of file diff --git a/src/driver_util.ml b/src/driver_util.ml index 3e828d2..386781a 100644 --- a/src/driver_util.ml +++ b/src/driver_util.ml @@ -245,6 +245,7 @@ let wasm_gen ~(elim_unused_let : bool) (core : Mcore.t) ~clam_callback = let riscv_gen (core : Mcore.t) = core |> Riscv_ssa.ssa_of_mcore + |> Riscv_opt.opt |> Riscv.generate let link_core ~(shrink_wasm : bool) ~(elim_unused_let : bool) diff --git a/src/riscv_opt.ml b/src/riscv_opt.ml index 150b7ae..2ecd64a 100644 --- a/src/riscv_opt.ml +++ b/src/riscv_opt.ml @@ -3,6 +3,7 @@ (** Instruction in SSA form; feel free to change it to anything you'd like *) type instruction = Riscv_ssa.t +(** Note: `body` does not include the label before instructions. *) type basic_block = { body: instruction Basic_vec.t; succ: string Basic_vec.t; @@ -40,11 +41,31 @@ let build_cfg fn body = let name = ref fn in let vec = ref (Basic_vec.make ~dummy:Riscv_ssa.Nop 16) in + (* There might be multiple jumps at end of each basic block. *) + (* Clean them up. *) + let tidy (vec: instruction Basic_vec.t) = + let rec iter () = + let len = Basic_vec.length vec in + if len <= 1 then () + + (* Check penultimate instruction, and pop the last according to it *) + else let x = Basic_vec.get vec (len - 2) in + match x with + | Jump _ -> Basic_vec.pop vec |> ignore; iter () + | Branch _ -> Basic_vec.pop vec |> ignore; iter () + | Return _ -> Basic_vec.pop vec |> ignore; iter () + | _ -> () + in + iter (); + vec + in + let separate_basic_block (inst: instruction) = (match inst with | Label label -> Hashtbl.add basic_blocks !name (make ()); - Basic_vec.append (block_of !name).body (!vec); + Basic_vec.append (block_of !name).body (tidy !vec); + (* Clear the instructions; Basic_vec does not offer clear() or something alike *) vec := Basic_vec.make ~dummy:Riscv_ssa.Nop 16; name := label @@ -67,8 +88,8 @@ let build_cfg fn body = (* i.e. only the last instruction can be jump/branch/return. *) (* So we just look at them. *) let rec find_succ name = - if not (Hashtbl.mem basic_blocks name) then - let block = block_of name in + let block = block_of name in + if Basic_vec.is_empty block.succ then let successors = (match Basic_vec.last block.body with | Jump target -> [target] @@ -136,7 +157,7 @@ let liveness_analysis fn = (* Keep doing until reaches fixed point *) let rec iterate worklist = - let last_item = Basic_vec.pop_no_compact worklist in + let last_item = Basic_vec.pop_opt worklist in match last_item with | None -> () | Some fn -> @@ -173,4 +194,27 @@ let liveness_analysis fn = in iterate (Hashtbl.find exit_fn fn); - live_out \ No newline at end of file + live_out + +let ssa_of_cfg fn = + let inst = Basic_vec.empty () in + let visited = ref Varset.empty in + let rec get_blocks x = + if not (Varset.mem x !visited) then + let block = block_of x in + + (* Body does not contain labels; *) + (* Fill it in here *) + Basic_vec.push inst (Riscv_ssa.Label x); + Basic_vec.append inst block.body; + visited := Varset.add x !visited; + Basic_vec.iter block.succ get_blocks + in + get_blocks fn; + inst |> Basic_vec.to_list + +let opt ssa = + visit_fn build_cfg ssa; + let s = map_fn ssa_of_cfg ssa in + Basic_io.write "core.ssa" (String.concat "\n" (List.map Riscv_ssa.to_string s)); + s \ No newline at end of file diff --git a/src/riscv_ssa.ml b/src/riscv_ssa.ml index 59cece4..4fa4503 100644 --- a/src/riscv_ssa.ml +++ b/src/riscv_ssa.ml @@ -369,14 +369,10 @@ let rec reg_map fd fs t = match t with (** Variables that has been accessed in this instruction. *) let use t = - (* Special care for phi. We'll take care of it in riscv_opt.ml. *) - match t with - | Phi _ -> [] - | _ -> - let result = ref [] in - let fs = (fun x -> result := x :: !result; unit) in - reg_map (fun _ -> unit) fs t |> ignore; - !result + let result = ref [] in + let fs = (fun x -> result := x :: !result; unit) in + reg_map (fun _ -> unit) fs t |> ignore; + !result (** The variable defined in the instruction. *) let def t = @@ -426,6 +422,8 @@ let deal_with_prim ssa rd (prim: Primitive.prim) args = | true, Neg, [rs1] -> (FNeg { rd; rs1 }) | _ -> die ()) in Basic_vec.push ssa op + + | Pignore -> () | _ -> Basic_vec.push ssa (Call { rd; fn = (Primitive.sexp_of_prim prim |> S.to_string); args }) @@ -495,10 +493,56 @@ let rec do_convert ssa (expr: Mcore.expr) = (* We tidy some of these up, and compile others into functions. *) | Cexpr_prim { prim; args; ty; _ } -> let rd = new_temp ty in - let args = List.map (fun expr -> do_convert ssa expr) args in - (* TODO: take special care with Psequand and Psequor. *) - (* They are short-circuited and should be compiled into if-else. *) - deal_with_prim ssa rd prim args; + (match prim, args with + | Psequand, [rs1; rs2] -> + (* Short circuiting, compile into if-else *) + (* rd = rs1 && rs2 -> rd = if (rs1) rs2 else false *) + let ifso = new_label "sequand_if_" in + let ifnot = new_label "sequand_else_" in + let ifexit = new_label "sequand_exit_" in + let t1 = new_temp Mtype.T_bool in + let t2 = new_temp Mtype.T_bool in + let cond = do_convert ssa rs1 in + Basic_vec.push ssa (Branch { cond; ifso; ifnot }); + + Basic_vec.push ssa (Label ifso); + let rs = do_convert ssa rs2 in + Basic_vec.push ssa (Assign { rd = t1; rs }); + Basic_vec.push ssa (Jump ifexit); + + Basic_vec.push ssa (Label ifnot); + Basic_vec.push ssa (AssignInt { rd = t2; imm = 0L; size = Bit8; signed = true }); + Basic_vec.push ssa (Jump ifexit); + + Basic_vec.push ssa (Label ifexit); + Basic_vec.push ssa (Phi { rd; rs = [(t1, ifso); (t2, ifnot) ]}) + + | Psequor, [rs1; rs2] -> + (* Short circuiting, compile into if-else *) + (* rd = rs1 || rs2 -> rd = if (rs1) true else rs2 *) + let ifso = new_label "sequor_if_" in + let ifnot = new_label "sequor_else_" in + let ifexit = new_label "sequor_exit_" in + let t1 = new_temp Mtype.T_bool in + let t2 = new_temp Mtype.T_bool in + let cond = do_convert ssa rs1 in + Basic_vec.push ssa (Branch { cond; ifso; ifnot }); + + Basic_vec.push ssa (Label ifso); + Basic_vec.push ssa (AssignInt { rd = t1; imm = 1L; size = Bit8; signed = true }); + Basic_vec.push ssa (Jump ifexit); + + Basic_vec.push ssa (Label ifnot); + let rs = do_convert ssa rs2 in + Basic_vec.push ssa (Assign { rd = t2; rs }); + Basic_vec.push ssa (Jump ifexit); + + Basic_vec.push ssa (Label ifexit); + Basic_vec.push ssa (Phi { rd; rs = [(t1, ifso); (t2, ifnot) ]}) + + | _ -> + let args = List.map (fun expr -> do_convert ssa expr) args in + deal_with_prim ssa rd prim args); rd | Cexpr_let { name; rhs; body; _ } -> @@ -740,8 +784,10 @@ let rec do_convert ssa (expr: Mcore.expr) = List.iter visit fields; rd - | Cexpr_break _ -> - prerr_endline "break"; + | Cexpr_break { label; _ } -> + (* Jumps to exit of the loop. *) + let loop_name = Printf.sprintf "%s_%d" label.name label.stamp in + Basic_vec.push ssa (Jump ("exit_" ^ loop_name)); unit | Cexpr_return _ -> @@ -869,5 +915,4 @@ let ssa_of_mcore (core: Mcore.t) = | None -> body in - Basic_io.write "core.ssa" (String.concat "\n" (List.map to_string with_main)); with_main