diff --git a/crates/noirc_frontend/src/hir/def_collector/dc_mod.rs b/crates/noirc_frontend/src/hir/def_collector/dc_mod.rs index fb0c797d0cb..9433cd1519a 100644 --- a/crates/noirc_frontend/src/hir/def_collector/dc_mod.rs +++ b/crates/noirc_frontend/src/hir/def_collector/dc_mod.rs @@ -21,7 +21,9 @@ struct ModCollector<'a> { pub(crate) module_id: LocalModuleId, } -/// Walk a module and collect it's definitions +/// Walk a module and collect its definitions. +/// +/// This performs the entirety of the definition collection phase of the name resolution pass. pub fn collect_defs( def_collector: &mut DefCollector, ast: ParsedModule, diff --git a/crates/noirc_frontend/src/hir/def_collector/mod.rs b/crates/noirc_frontend/src/hir/def_collector/mod.rs index 5608f9ed118..26f360642ef 100644 --- a/crates/noirc_frontend/src/hir/def_collector/mod.rs +++ b/crates/noirc_frontend/src/hir/def_collector/mod.rs @@ -1,3 +1,22 @@ +//! This set of modules implements the name resolution pass which converts the AST into +//! the HIR. In comparison to the AST, the HIR is interned in the NodeInterner and attaches +//! DefinitionIds to each Variable AST node to link them up to their definition. In doing so, +//! this pass validates scoping requirements and is responsible for issuing name/definition +//! related errors including missing definitions and duplicate definitions. One aspect of +//! handling definition links is handling and tracking imports. Another result of this pass +//! is that all imports will be removed from the program, and the AST of each file will +//! be combined into a larger Hir stored in the NodeInterner and linked together via DefinitionIds. +//! +//! The pass is comprised of two parts. The first part - definition collection - is implemented +//! in dc_mod::collect_defs and is responsible for collecting all the public symbols used by +//! the program so that we can resolve them later without worrying about cyclic references or +//! variables that aren't defined yet. +//! +//! The second part of the pass is the actual name resolution. Name resolution is handled by +//! super::resolution::Resolvers which traverse the entirety of a definition, ensure all names +//! are defined and linked, and convert the definition into Hir. +//! +//! These passes are performed sequentially (along with type checking afterward) in dc_crate. pub mod dc_crate; pub mod dc_mod; mod errors; diff --git a/crates/noirc_frontend/src/hir/def_map/mod.rs b/crates/noirc_frontend/src/hir/def_map/mod.rs index 2fb161e20b3..be2a9feeb5f 100644 --- a/crates/noirc_frontend/src/hir/def_map/mod.rs +++ b/crates/noirc_frontend/src/hir/def_map/mod.rs @@ -17,6 +17,7 @@ mod module_data; pub use module_data::*; mod namespace; pub use namespace::*; + // XXX: Ultimately, we want to constrain an index to be of a certain type just like in RA /// Lets first check if this is offered by any external crate /// XXX: RA has made this a crate on crates.io @@ -35,6 +36,9 @@ pub struct ModuleId { pub local_id: LocalModuleId, } +/// Map of all modules and scopes defined within a crate. +/// +/// The definitions of the crate are accessible indirectly via the scopes of each module. #[derive(Debug)] pub struct CrateDefMap { pub(crate) root: LocalModuleId, diff --git a/crates/noirc_frontend/src/hir/def_map/module_data.rs b/crates/noirc_frontend/src/hir/def_map/module_data.rs index 30f73740555..8f62f7581b1 100644 --- a/crates/noirc_frontend/src/hir/def_map/module_data.rs +++ b/crates/noirc_frontend/src/hir/def_map/module_data.rs @@ -6,6 +6,8 @@ use crate::Ident; use super::{ItemScope, LocalModuleId}; +/// Contains the actual contents of a module: its parent (if one exists), +/// children, and scope with all definitions defined within the scope. #[derive(Default, Debug, PartialEq, Eq)] pub struct ModuleData { pub parent: Option, diff --git a/crates/noirc_frontend/src/hir/def_map/module_def.rs b/crates/noirc_frontend/src/hir/def_map/module_def.rs index 881e947fad8..399ee15700c 100644 --- a/crates/noirc_frontend/src/hir/def_map/module_def.rs +++ b/crates/noirc_frontend/src/hir/def_map/module_def.rs @@ -2,6 +2,7 @@ use crate::node_interner::{FuncId, StmtId, StructId}; use super::ModuleId; +/// A generic ID that references either a module, function, type, or global #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum ModuleDefId { ModuleId(ModuleId), diff --git a/crates/noirc_frontend/src/hir/resolution/mod.rs b/crates/noirc_frontend/src/hir/resolution/mod.rs index 63601c33dd0..32d6e0b07bd 100644 --- a/crates/noirc_frontend/src/hir/resolution/mod.rs +++ b/crates/noirc_frontend/src/hir/resolution/mod.rs @@ -1,3 +1,10 @@ +//! This set of modules implements the second half of the name resolution pass. +//! After all definitions are collected by def_collector, resovler::Resolvers are +//! created to resolve all names within a definition. In this context 'resolving' +//! a name means validating that it has a valid definition, and that it was not +//! redefined multiple times in the same scope. Once this is validated, it is linked +//! to that definition via a matching DefinitionId. All references to the same definition +//! will have the same DefinitionId. pub mod errors; pub mod import; pub mod path_resolver; diff --git a/crates/noirc_frontend/src/hir/resolution/resolver.rs b/crates/noirc_frontend/src/hir/resolution/resolver.rs index 554c15fe208..6b2b095cc64 100644 --- a/crates/noirc_frontend/src/hir/resolution/resolver.rs +++ b/crates/noirc_frontend/src/hir/resolution/resolver.rs @@ -11,12 +11,6 @@ // XXX: Change mentions of intern to resolve. In regards to the above comment // // XXX: Resolver does not check for unused functions -#[derive(Debug, PartialEq, Eq)] -struct ResolverMeta { - num_times_used: usize, - ident: HirIdent, -} - use crate::hir_def::expr::{ HirBinaryOp, HirBlockExpression, HirCallExpression, HirCastExpression, HirConstructorExpression, HirExpression, HirForExpression, HirIdent, HirIfExpression, @@ -63,6 +57,12 @@ type Scope = GenericScope; type ScopeTree = GenericScopeTree; type ScopeForest = GenericScopeForest; +/// The primary jobs of the Resolver are to validate that every variable found refers to exactly 1 +/// definition in scope, and to convert the AST into the HIR. +/// +/// A Resolver is a short-lived struct created to resolve a top-level definition. +/// One of these is created for each function definition and struct definition. +/// This isn't strictly necessary to its function, it could be refactored out in the future. pub struct Resolver<'a> { scopes: ScopeForest, path_resolver: &'a dyn PathResolver, @@ -88,6 +88,13 @@ pub struct Resolver<'a> { lambda_index: usize, } +/// ResolverMetas are tagged onto each definition to track how many times they are used +#[derive(Debug, PartialEq, Eq)] +struct ResolverMeta { + num_times_used: usize, + ident: HirIdent, +} + impl<'a> Resolver<'a> { pub fn new( interner: &'a mut NodeInterner, diff --git a/crates/noirc_frontend/src/hir_def/expr.rs b/crates/noirc_frontend/src/hir_def/expr.rs index 5ad5a3a6fea..a34fbe73721 100644 --- a/crates/noirc_frontend/src/hir_def/expr.rs +++ b/crates/noirc_frontend/src/hir_def/expr.rs @@ -8,6 +8,11 @@ use crate::{BinaryOp, BinaryOpKind, Ident, Shared, UnaryOp}; use super::stmt::HirPattern; use super::types::{StructType, Type}; +/// A HirExpression is the result of an Expression in the AST undergoing +/// name resolution. It is almost identical to the Expression AST node, but +/// references other HIR nodes indirectly via IDs rather than directly via +/// boxing. Variables in HirExpressions are tagged with their DefinitionId +/// from the definition that refers to them so there is no ambiguity with names. #[derive(Debug, Clone)] pub enum HirExpression { Ident(HirIdent), @@ -35,6 +40,7 @@ impl HirExpression { } } +/// Corresponds to a variable in the source code #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct HirIdent { pub location: Location, @@ -89,6 +95,8 @@ pub struct HirInfixExpression { pub rhs: ExprId, } +/// This is always a struct field access `mystruct.field` +/// and never a method call. The later is represented by HirMethodCallExpression. #[derive(Debug, Clone)] pub struct HirMemberAccess { pub lhs: ExprId, @@ -104,6 +112,7 @@ pub struct HirIfExpression { pub alternative: Option, } +// `lhs as type` in the source code #[derive(Debug, Clone)] pub struct HirCastExpression { pub lhs: ExprId, @@ -160,6 +169,7 @@ pub struct HirConstructorExpression { pub fields: Vec<(Ident, ExprId)>, } +/// Indexing, as in `array[index]` #[derive(Debug, Clone)] pub struct HirIndexExpression { pub collection: ExprId, diff --git a/crates/noirc_frontend/src/hir_def/function.rs b/crates/noirc_frontend/src/hir_def/function.rs index 1f80cd09df8..644bf4e0a41 100644 --- a/crates/noirc_frontend/src/hir_def/function.rs +++ b/crates/noirc_frontend/src/hir_def/function.rs @@ -115,14 +115,22 @@ impl From> for Parameters { } } +/// A FuncMeta contains the signature of the function and any associated meta data like +/// the function's Location, FunctionKind, and attributes. If the function's body is +/// needed, it can be retrieved separately via `NodeInterner::function(&self, &FuncId)`. #[derive(Debug, Clone)] pub struct FuncMeta { pub name: HirIdent, pub kind: FunctionKind, + /// A function's attributes are the `#[...]` items above the function + /// definition, if any. Currently, this is limited to a maximum of only one + /// Attribute per function. pub attributes: Option, + pub parameters: Parameters, + pub return_visibility: AbiVisibility, /// The type of this function. Either a Type::Function diff --git a/crates/noirc_frontend/src/hir_def/stmt.rs b/crates/noirc_frontend/src/hir_def/stmt.rs index 278126d224d..04a6d9770fa 100644 --- a/crates/noirc_frontend/src/hir_def/stmt.rs +++ b/crates/noirc_frontend/src/hir_def/stmt.rs @@ -4,6 +4,20 @@ use crate::{Ident, Type}; use fm::FileId; use noirc_errors::Span; +/// A HirStatement is the result of performing name resolution on +/// the Statement AST node. Unlike the AST node, any nested nodes +/// are referred to indirectly via ExprId or StmtId, which can be +/// used to retrieve the relevant node via the NodeInterner. +#[derive(Debug, Clone)] +pub enum HirStatement { + Let(HirLetStatement), + Constrain(HirConstrainStatement), + Assign(HirAssignStatement), + Expression(ExprId), + Semi(ExprId), + Error, +} + #[derive(Debug, Clone)] pub struct HirLetStatement { pub pattern: HirPattern, @@ -20,32 +34,20 @@ impl HirLetStatement { } } +/// Corresponds to `lvalue = expression;` in the source code #[derive(Debug, Clone)] pub struct HirAssignStatement { pub lvalue: HirLValue, pub expression: ExprId, } +/// Corresponds to `constrain expr;` in the source code. +/// This node also contains the FileId of the file the constrain +/// originates from. This is used later in the SSA pass to issue +/// an error if a constrain is found to be always false. #[derive(Debug, Clone)] pub struct HirConstrainStatement(pub ExprId, pub FileId); -#[derive(Debug, Clone)] -pub struct BinaryStatement { - pub lhs: ExprId, - pub r#type: Type, - pub expression: ExprId, -} - -#[derive(Debug, Clone)] -pub enum HirStatement { - Let(HirLetStatement), - Constrain(HirConstrainStatement), - Assign(HirAssignStatement), - Expression(ExprId), - Semi(ExprId), - Error, -} - #[derive(Debug, Clone)] pub enum HirPattern { Identifier(HirIdent), @@ -79,7 +81,8 @@ impl HirPattern { } } -/// Represents an Ast form that can be assigned to +/// Represents an Ast form that can be assigned to. These +/// can be found on the left hand side of an assignment `=`. #[derive(Debug, PartialEq, Eq, Clone)] pub enum HirLValue { Ident(HirIdent, Type), diff --git a/crates/noirc_frontend/src/hir_def/types.rs b/crates/noirc_frontend/src/hir_def/types.rs index 10b70ad2db1..17b6536e999 100644 --- a/crates/noirc_frontend/src/hir_def/types.rs +++ b/crates/noirc_frontend/src/hir_def/types.rs @@ -11,49 +11,38 @@ use noirc_errors::Span; use crate::{node_interner::StructId, Ident, Signedness}; -/// A shared, mutable reference to some T. -/// Wrapper is required for Hash impl of RefCell. -#[derive(Debug, Eq, PartialOrd, Ord)] -pub struct Shared(Rc>); - -impl std::hash::Hash for Shared { - fn hash(&self, state: &mut H) { - self.0.borrow().hash(state) - } -} - -impl PartialEq for Shared { - fn eq(&self, other: &Self) -> bool { - let ref1 = self.0.borrow(); - let ref2 = other.0.borrow(); - *ref1 == *ref2 - } -} +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub enum Type { + FieldElement(CompTime), + Array(Box, Box), // Array(4, Field) = [Field; 4] + Integer(CompTime, Signedness, u32), // u32 = Integer(unsigned, 32) + PolymorphicInteger(CompTime, TypeVariable), + Bool(CompTime), + String(Box), + Unit, + Struct(Shared, Vec), + Tuple(Vec), + TypeVariable(TypeVariable), -impl Clone for Shared { - fn clone(&self) -> Self { - Shared(self.0.clone()) - } -} + /// NamedGenerics are the 'T' or 'U' in a user-defined generic function + /// like `fn foo(...) {}`. Unlike TypeVariables, they cannot be bound over. + NamedGeneric(TypeVariable, Rc), -impl From for Shared { - fn from(thing: T) -> Shared { - Shared::new(thing) - } -} + /// A functions with arguments, and a return type. + Function(Vec, Box), -impl Shared { - pub fn new(thing: T) -> Shared { - Shared(Rc::new(RefCell::new(thing))) - } + /// A type generic over the given type variables. + /// Storing both the TypeVariableId and TypeVariable isn't necessary + /// but it makes handling them both easier. The TypeVariableId should + /// never be bound over during type checking, but during monomorphization it + /// will be and thus needs the full TypeVariable link. + Forall(Generics, Box), - pub fn borrow(&self) -> std::cell::Ref { - self.0.borrow() - } + /// A type-level integer. Included to let an Array's size type variable + /// bind to an integer without special checks to bind it to a non-type. + Constant(u64), - pub fn borrow_mut(&self) -> std::cell::RefMut { - self.0.borrow_mut() - } + Error, } /// A list of TypeVariableIds to bind to a type. Storing the @@ -61,9 +50,15 @@ impl Shared { /// the binding to later be undone if needed. pub type TypeBindings = HashMap; +/// Represents a struct type in the type system. Each instance of this +/// rust struct will be shared across all Type::Struct variants that represent +/// the same struct type. #[derive(Debug, Eq)] pub struct StructType { + /// A unique id representing this struct type. Used to check if two + /// struct types are equal. pub id: StructId, + pub name: Ident, /// Fields are ordered and private, they should only @@ -75,6 +70,10 @@ pub struct StructType { pub span: Span, } +/// Corresponds to generic lists such as `` in the source +/// program. The `TypeVariableId` portion is used to match two +/// type variables to check for equality, while the `TypeVariable` is +/// the actual part that can be mutated to bind it to another type. pub type Generics = Vec<(TypeVariableId, TypeVariable)>; impl std::hash::Hash for StructType { @@ -100,6 +99,10 @@ impl StructType { StructType { id, fields, name, span, generics } } + /// To account for cyclic references between structs, a struct's + /// fields are resolved strictly after the struct itself is initially + /// created. Therefore, this method is used to set the fields once they + /// become known. pub fn set_fields(&mut self, fields: BTreeMap) { assert!(self.fields.is_empty()); self.fields = fields; @@ -127,6 +130,7 @@ impl StructType { ) } + /// Returns all the fields of this type, after being applied to the given generic arguments. pub fn get_fields(&self, generic_args: &[Type]) -> BTreeMap { assert_eq!(self.generics.len(), generic_args.len()); @@ -163,38 +167,49 @@ impl std::fmt::Display for StructType { } } -#[derive(Debug, PartialEq, Eq, Clone, Hash)] -pub enum Type { - FieldElement(CompTime), - Array(Box, Box), // Array(4, Field) = [Field; 4] - Integer(CompTime, Signedness, u32), // u32 = Integer(unsigned, 32) - PolymorphicInteger(CompTime, TypeVariable), - Bool(CompTime), - String(Box), - Unit, - Struct(Shared, Vec), - Tuple(Vec), - TypeVariable(TypeVariable), +/// A shared, mutable reference to some T. +/// Wrapper is required for Hash impl of RefCell. +#[derive(Debug, Eq, PartialOrd, Ord)] +pub struct Shared(Rc>); - /// NamedGenerics are the 'T' or 'U' in a user-defined generic function - /// like `fn foo(...) {}`. Unlike TypeVariables, they cannot be bound over. - NamedGeneric(TypeVariable, Rc), +impl std::hash::Hash for Shared { + fn hash(&self, state: &mut H) { + self.0.borrow().hash(state) + } +} - /// A functions with arguments, and a return type. - Function(Vec, Box), +impl PartialEq for Shared { + fn eq(&self, other: &Self) -> bool { + let ref1 = self.0.borrow(); + let ref2 = other.0.borrow(); + *ref1 == *ref2 + } +} - /// A type generic over the given type variables. - /// Storing both the TypeVariableId and TypeVariable isn't necessary - /// but it makes handling them both easier. The TypeVariableId should - /// never be bound over during type checking, but during monomorphization it - /// will be and thus needs the full TypeVariable link. - Forall(Generics, Box), +impl Clone for Shared { + fn clone(&self) -> Self { + Shared(self.0.clone()) + } +} - /// A type-level integer. Included to let an Array's size type variable - /// bind to an integer without special checks to bind it to a non-type. - Constant(u64), +impl From for Shared { + fn from(thing: T) -> Shared { + Shared::new(thing) + } +} - Error, +impl Shared { + pub fn new(thing: T) -> Shared { + Shared(Rc::new(RefCell::new(thing))) + } + + pub fn borrow(&self) -> std::cell::Ref { + self.0.borrow() + } + + pub fn borrow_mut(&self) -> std::cell::RefMut { + self.0.borrow_mut() + } } /// A restricted subset of binary operators useable on @@ -208,8 +223,12 @@ pub enum BinaryTypeOperator { Modulo, } +/// A TypeVariable is a mutable reference that is either +/// bound to some type, or unbound with a given TypeVariableId. pub type TypeVariable = Shared; +/// TypeBindings are the mutable insides of a TypeVariable. +/// They are either bound to some type, or are unbound. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TypeBinding { Bound(Type), @@ -222,16 +241,31 @@ impl TypeBinding { } } +/// A unique ID used to differentiate different type variables #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub struct TypeVariableId(pub usize); +/// Noir's type system keeps track of whether or not every primitive type's value +/// is known at compile-time. This is exposed through users through the `comptime` +/// keyword in noir which can be prefixed before primitive types. A usage like +/// `t: comptime Field` would correspond to a Field type with a CompTime::Yes(None) +/// variant of this enum +/// +/// Note that whether or not a variable is comptime can also be inferred based on its use. +/// A value passed to a function that expects a `comptime Field` must be CompTime::Yes, +/// likewise a parameter of the current function that is just a `Field` can only be CompTime::No. +/// There is also the case of integer literals which are typed as CompTime::Maybe. These are +/// polymorphically comptime because they can be used in both contexts. #[derive(Debug, Clone, Eq)] pub enum CompTime { // Yes and No variants have optional spans representing the location in the source code // which caused them to be compile time. Yes(Option), No(Option), - Maybe(TypeVariableId, Rc>>), + + /// Maybe has an id and shared inner reference that can be rebound later to + /// another specific CompTime variant. + Maybe(TypeVariableId, Shared>), } impl std::hash::Hash for CompTime { @@ -278,9 +312,11 @@ pub enum SpanKind { impl CompTime { pub fn new(interner: &mut NodeInterner) -> Self { let id = interner.next_type_variable_id(); - Self::Maybe(id, Rc::new(RefCell::new(None))) + Self::Maybe(id, Shared::new(None)) } + /// Set the Span on this CompTime (if it has one) to keep track of + /// when it was last changed to give better error messages. fn set_span(&mut self, new_span: Span) { match self { CompTime::Yes(span) | CompTime::No(span) => *span = Some(new_span), @@ -978,6 +1014,8 @@ impl Type { } } + /// If this type is a Type::Constant (used in array lengths), or is bound + /// to a Type::Constant, return the constant as a u64. pub fn evaluate_to_u64(&self) -> Option { match self { Type::PolymorphicInteger(_, binding) @@ -1159,8 +1197,7 @@ impl Type { } } - /// True if the given TypeVariableId is free anywhere - /// within self + /// True if the given TypeVariableId is free anywhere within self fn occurs(&self, target_id: TypeVariableId) -> bool { match self { Type::Array(len, elem) => len.occurs(target_id) || elem.occurs(target_id),