From 40eac5b15557b05414a7be9c91d0bb70d8706b06 Mon Sep 17 00:00:00 2001 From: Tbkhi <157125900+Tbkhi@users.noreply.github.com> Date: Mon, 26 Feb 2024 09:16:41 -0400 Subject: [PATCH 1/3] Update serialization.md Minor documentation updates. --- src/serialization.md | 51 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/src/serialization.md b/src/serialization.md index 74c560071..dc561d26e 100644 --- a/src/serialization.md +++ b/src/serialization.md @@ -1,15 +1,16 @@ # Serialization in Rustc -Rustc has to [serialize] and deserialize various data during compilation. -Specifically: +Rust's compiler has to [serialize] and deserialize various data during +compilation. Specifically: -- "Crate metadata", mainly query outputs, are serialized in a binary - format into `rlib` and `rmeta` files that are output when compiling a library - crate, these are then deserialized by crates that depend on that library. +- Certain crate metadata, consisting mainly of query outputs, are serialized + from a binary format into `rlib` and `rmeta` files that are output when + compiling a library crate. These `rlib` and `rmeta` files are then + deserialized by the crates which depend on that library. - Certain query outputs are serialized in a binary format to [persist incremental compilation results]. -- [`CrateInfo`] is serialized to json when the `-Z no-link` flag is used, and - deserialized from json when the `-Z link-only` flag is used. +- [`CrateInfo`] is serialized to `JSON` when the `-Z no-link` flag is used, and + deserialized from `JSON` when the `-Z link-only` flag is used. ## The `Encodable` and `Decodable` traits @@ -30,7 +31,7 @@ types, `bool`, `char`, `str` and various common standard library types. For types that are constructed from those types, `Encodable` and `Decodable` are usually implemented by [derives]. These generate implementations that forward -deserialization to the fields of the struct or enum. For a struct those impls +deserialization to the fields of the `struct` or `enum`. For a `struct` those `impl`s look something like this: ```rust,ignore @@ -51,6 +52,7 @@ impl Encodable for MyStruct { }) } } + impl Decodable for MyStruct { fn decode(s: &mut D) -> Result { s.read_struct("MyStruct", 2, |d| { @@ -65,13 +67,13 @@ impl Decodable for MyStruct { ## Encoding and Decoding arena allocated types -Rustc has a lot of [arena allocated types]. Deserializing these types isn't -possible without access to the arena that they need to be allocated on. The -[`TyDecoder`] and [`TyEncoder`] traits are supertraits of `Decoder` and +Rust's compiler has a lot of [arena allocated types]. Deserializing these types +isn't possible without access to the `arena` that they need to be allocated on. +The [`TyDecoder`] and [`TyEncoder`] `trait`s are supertraits of `Decoder` and `Encoder` that allow access to a `TyCtxt`. -Types which contain arena allocated types can then bound the type parameter of -their `Encodable` and `Decodable` implementations with these traits. For +Types which contain `arena` allocated types can then bound the type parameter +of their `Encodable` and `Decodable` implementations with these `trait`s. For example ```rust,ignore @@ -83,7 +85,7 @@ impl<'tcx, D: TyDecoder<'tcx>> Decodable for MyStruct<'tcx> { The `TyEncodable` and `TyDecodable` [derive macros][derives] will expand to such an implementation. -Decoding the actual arena allocated type is harder, because some of the +Decoding the actual `arena` allocated type is harder, because some of the implementations can't be written due to the orphan rules. To work around this, the [`RefDecodable`] trait is defined in `rustc_middle`. This can then be implemented for any type. The `TyDecodable` macro will call `RefDecodable` to @@ -117,7 +119,7 @@ and `Encodable`. `Ty` can be deeply recursive, if each `Ty` was encoded naively then crate metadata would be very large. To handle this, each `TyEncoder` has a cache of locations in its output where it has serialized types. If a type being encoded -is in the cache, then instead of serializing the type as usual, the byte offset +is in cache, then instead of serializing the type as usual, the byte offset within the file being written is encoded instead. A similar scheme is used for `ty::Predicate`. @@ -131,7 +133,7 @@ The [`LazyValue`] type wraps the (relative) offset in the crate metadata wher The `LazyArray<[T]>` and `LazyTable` types provide some functionality over `Lazy>` and `Lazy>`: -- It's possible to encode a `LazyArray` directly from an iterator, without +- It's possible to encode a `LazyArray` directly from an `Iterator`, without first collecting into a `Vec`. - Indexing into a `LazyTable` does not require decoding entries other than the one being read. @@ -142,15 +144,9 @@ time. Instead the query system is the main way of caching these results. ## Specialization A few types, most notably `DefId`, need to have different implementations for -different `Encoder`s. This is currently handled by ad-hoc specializations: -`DefId` has a `default` implementation of `Encodable` and a specialized one -for `Encodable`. - -[arena allocated types]: memory.md -[AST]: the-parser.md -[derives]: #derive-macros -[persist incremental compilation results]: queries/incremental-compilation-in-detail.md#the-real-world-how-persistence-makes-everything-complicated -[serialize]: https://en.wikipedia.org/wiki/Serialization +different `Encoder`s. This is currently handled by ad-hoc specializations, for +example: `DefId` has a `default` implementation of `Encodable` and a +specialized one for `Encodable`. [`CrateInfo`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/struct.CrateInfo.html [`LazyArray`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.LazyValue.html @@ -162,3 +158,8 @@ for `Encodable`. [`rustc_serialize`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_serialize/index.html [`TyDecoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.TyDecoder.html [`TyEncoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.TyEncoder.html +[arena allocated types]: memory.md +[AST]: the-parser.md +[derives]: #derive-macros +[persist incremental compilation results]: queries/incremental-compilation-in-detail.md#the-real-world-how-persistence-makes-everything-complicated +[serialize]: https://en.wikipedia.org/wiki/Serialization From b3a26684450e260a171cdc75842fabb5e7cf07b7 Mon Sep 17 00:00:00 2001 From: Tbkhi Date: Tue, 5 Mar 2024 10:34:12 -0400 Subject: [PATCH 2/3] updates documentation --- src/serialization.md | 94 +++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 37 deletions(-) diff --git a/src/serialization.md b/src/serialization.md index dc561d26e..a518c69ec 100644 --- a/src/serialization.md +++ b/src/serialization.md @@ -12,6 +12,10 @@ compilation. Specifically: - [`CrateInfo`] is serialized to `JSON` when the `-Z no-link` flag is used, and deserialized from `JSON` when the `-Z link-only` flag is used. +[`CrateInfo`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/struct.CrateInfo.html +[persist incremental compilation results]: queries/incremental-compilation-in-detail.md#the-real-world-how-persistence-makes-everything-complicated +[serialize]: https://en.wikipedia.org/wiki/Serialization + ## The `Encodable` and `Decodable` traits The [`rustc_serialize`] crate defines two traits for types which can be serialized: @@ -26,13 +30,14 @@ pub trait Decodable: Sized { } ``` -It also defines implementations of these for integer types, floating point -types, `bool`, `char`, `str` and various common standard library types. +It also defines implementations of these for various common standard library +[primitive types](https://doc.rust-lang.org/std/#primitives) such as integer +types, floating point types, `bool`, `char`, `str`, etc. -For types that are constructed from those types, `Encodable` and `Decodable` are -usually implemented by [derives]. These generate implementations that forward -deserialization to the fields of the `struct` or `enum`. For a `struct` those `impl`s -look something like this: +For types that are constructed from those types, `Encodable` and `Decodable` +are usually implemented by [derives]. These generate implementations that +forward deserialization to the `field`s of the `struct` or `enum`. For a +`struct` those `impl`s look something like this: ```rust,ignore #![feature(rustc_private)] @@ -64,16 +69,17 @@ impl Decodable for MyStruct { } } ``` +[`rustc_serialize`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_serialize/index.html ## Encoding and Decoding arena allocated types Rust's compiler has a lot of [arena allocated types]. Deserializing these types isn't possible without access to the `arena` that they need to be allocated on. -The [`TyDecoder`] and [`TyEncoder`] `trait`s are supertraits of `Decoder` and -`Encoder` that allow access to a `TyCtxt`. +The [`TyDecoder`] and [`TyEncoder`] `trait`s are supertraits of [`Decoder`] and +[`Encoder`] that allow access to a [`TyCtxt`]. Types which contain `arena` allocated types can then bound the type parameter -of their `Encodable` and `Decodable` implementations with these `trait`s. For +of their [`Encodable`] and [`Decodable`] implementations with these `trait`s. For example ```rust,ignore @@ -82,38 +88,62 @@ impl<'tcx, D: TyDecoder<'tcx>> Decodable for MyStruct<'tcx> { } ``` -The `TyEncodable` and `TyDecodable` [derive macros][derives] will expand to such +The [`TyEncodable`] and [`TyDecodable`] [derive macros][derives] will expand to such an implementation. Decoding the actual `arena` allocated type is harder, because some of the -implementations can't be written due to the orphan rules. To work around this, -the [`RefDecodable`] trait is defined in `rustc_middle`. This can then be +implementations can't be written due to the [orphan rules]. To work around this, +the [`RefDecodable`] trait is defined in [`rustc_middle`]. This can then be implemented for any type. The `TyDecodable` macro will call `RefDecodable` to decode references, but various generic code needs types to actually be `Decodable` with a specific decoder. For interned types instead of manually implementing `RefDecodable`, using a new -type wrapper, like `ty::Predicate` and manually implementing `Encodable` and +type wrapper, like [`ty::Predicate`] and manually implementing `Encodable` and `Decodable` may be simpler. +[`Decodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_serialize/trait.Decodable.html +[`Decoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_serialize/trait.Decoder.html +[`Encodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_serialize/trait.Encodable.html +[`Encoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_serialize/trait.Encoder.html +[`RefDecodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.RefDecodable.html +[`rustc_middle`]: https://doc.rust-lang.org/nightly/nightly-rustc/src/rustc_type_ir/codec.rs.html#21 +[`ty::Predicate`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/predicate/struct.Predicate.html +[`TyCtxt`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/struct.TyCtxt.html +[`TyDecodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_macros/derive.TyDecodable.html +[`TyDecoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.TyDecoder.html +[`TyEncodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_macros/derive.TyEncodable.html +[`TyEncoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.TyEncoder.html +[arena allocated types]: memory.md +[derives]: #derive-macros +[orphan rules]:https://doc.rust-lang.org/reference/items/implementations.html#orphan-rules + ## Derive macros -The `rustc_macros` crate defines various derives to help implement `Decodable` +The [`rustc_macros`] crate defines various derives to help implement `Decodable` and `Encodable`. - The `Encodable` and `Decodable` macros generate implementations that apply to all `Encoders` and `Decoders`. These should be used in crates that don't - depend on `rustc_middle`, or that have to be serialized by a type that does + depend on [`rustc_middle`], or that have to be serialized by a type that does not implement `TyEncoder`. -- `MetadataEncodable` and `MetadataDecodable` generate implementations that +- [`MetadataEncodable`] and [`MetadataDecodable`] generate implementations that only allow decoding by [`rustc_metadata::rmeta::encoder::EncodeContext`] and [`rustc_metadata::rmeta::decoder::DecodeContext`]. These are used for types - that contain `rustc_metadata::rmeta::Lazy*`. + that contain [`rustc_metadata::rmeta::`]`Lazy*`. - `TyEncodable` and `TyDecodable` generate implementation that apply to any `TyEncoder` or `TyDecoder`. These should be used for types that are only serialized in crate metadata and/or the incremental cache, which is most serializable types in `rustc_middle`. +[`MetadataDecodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_macros/derive.MetadataDecodable.html +[`MetadataEncodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_macros/derive.MetadataEncodable.html +[`rustc_macros`]: https://github.com/rust-lang/rust/tree/master/compiler/rustc_macros +[`rustc_metadata::rmeta::`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/index.html +[`rustc_metadata::rmeta::decoder::DecodeContext`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/decoder/struct.DecodeContext.html +[`rustc_metadata::rmeta::encoder::EncodeContext`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/encoder/struct.EncodeContext.html +[`rustc_middle`]: https://github.com/rust-lang/rust/tree/master/compiler/rustc_middle + ## Shorthands `Ty` can be deeply recursive, if each `Ty` was encoded naively then crate @@ -127,8 +157,9 @@ within the file being written is encoded instead. A similar scheme is used for Crate metadata is initially loaded before the `TyCtxt<'tcx>` is created, so some deserialization needs to be deferred from the initial loading of metadata. -The [`LazyValue`] type wraps the (relative) offset in the crate metadata where a -`T` has been serialized. There are also some variants, [`LazyArray`] and [`LazyTable`]. +The [`LazyValue`] type wraps the (relative) offset in the crate metadata +where a `T` has been serialized. There are also some variants, [`LazyArray`] +and [`LazyTable`]. The `LazyArray<[T]>` and `LazyTable` types provide some functionality over `Lazy>` and `Lazy>`: @@ -138,8 +169,13 @@ The `LazyArray<[T]>` and `LazyTable` types provide some functionality over - Indexing into a `LazyTable` does not require decoding entries other than the one being read. -**note**: `LazyValue` does not cache its value after being deserialized the first -time. Instead the query system is the main way of caching these results. +**note**: `LazyValue` does not cache its value after being deserialized the +first time. Instead the query system its self is the main way of caching these +results. + +[`LazyArray`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.LazyValue.html +[`LazyTable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.LazyValue.html +[`LazyValue`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.LazyValue.html ## Specialization @@ -147,19 +183,3 @@ A few types, most notably `DefId`, need to have different implementations for different `Encoder`s. This is currently handled by ad-hoc specializations, for example: `DefId` has a `default` implementation of `Encodable` and a specialized one for `Encodable`. - -[`CrateInfo`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_codegen_ssa/struct.CrateInfo.html -[`LazyArray`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.LazyValue.html -[`LazyTable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.LazyValue.html -[`LazyValue`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/struct.LazyValue.html -[`RefDecodable`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.RefDecodable.html -[`rustc_metadata::rmeta::decoder::DecodeContext`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/decoder/struct.DecodeContext.html -[`rustc_metadata::rmeta::encoder::EncodeContext`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_metadata/rmeta/encoder/struct.EncodeContext.html -[`rustc_serialize`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_serialize/index.html -[`TyDecoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.TyDecoder.html -[`TyEncoder`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_middle/ty/codec/trait.TyEncoder.html -[arena allocated types]: memory.md -[AST]: the-parser.md -[derives]: #derive-macros -[persist incremental compilation results]: queries/incremental-compilation-in-detail.md#the-real-world-how-persistence-makes-everything-complicated -[serialize]: https://en.wikipedia.org/wiki/Serialization From 4ad3f77e3a1309c160de2bc1f7e7682c2227c234 Mon Sep 17 00:00:00 2001 From: Noratrieb <48135649+Noratrieb@users.noreply.github.com> Date: Tue, 24 Sep 2024 20:32:42 +0200 Subject: [PATCH 3/3] minor edits --- src/serialization.md | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/serialization.md b/src/serialization.md index a518c69ec..0ff049901 100644 --- a/src/serialization.md +++ b/src/serialization.md @@ -1,9 +1,9 @@ # Serialization in Rustc -Rust's compiler has to [serialize] and deserialize various data during -compilation. Specifically: +rustc has to [serialize] and deserialize various data during compilation. +Specifically: -- Certain crate metadata, consisting mainly of query outputs, are serialized +- "Crate metadata", consisting mainly of query outputs, are serialized from a binary format into `rlib` and `rmeta` files that are output when compiling a library crate. These `rlib` and `rmeta` files are then deserialized by the crates which depend on that library. @@ -36,8 +36,8 @@ types, floating point types, `bool`, `char`, `str`, etc. For types that are constructed from those types, `Encodable` and `Decodable` are usually implemented by [derives]. These generate implementations that -forward deserialization to the `field`s of the `struct` or `enum`. For a -`struct` those `impl`s look something like this: +forward deserialization to the fields of the struct or enum. For a +struct those impls look something like this: ```rust,ignore #![feature(rustc_private)] @@ -73,14 +73,13 @@ impl Decodable for MyStruct { ## Encoding and Decoding arena allocated types -Rust's compiler has a lot of [arena allocated types]. Deserializing these types -isn't possible without access to the `arena` that they need to be allocated on. -The [`TyDecoder`] and [`TyEncoder`] `trait`s are supertraits of [`Decoder`] and -[`Encoder`] that allow access to a [`TyCtxt`]. +rustc has a lot of [arena allocated types]. +Deserializing these types isn't possible without access to the arena that they need to be allocated on. +The [`TyDecoder`] and [`TyEncoder`] traits are supertraits of [`Decoder`] and [`Encoder`] that allow access to a [`TyCtxt`]. -Types which contain `arena` allocated types can then bound the type parameter -of their [`Encodable`] and [`Decodable`] implementations with these `trait`s. For -example +Types which contain `arena` allocated types can then bound the type parameter of their +[`Encodable`] and [`Decodable`] implementations with these traits. +For example ```rust,ignore impl<'tcx, D: TyDecoder<'tcx>> Decodable for MyStruct<'tcx> { @@ -149,7 +148,7 @@ and `Encodable`. `Ty` can be deeply recursive, if each `Ty` was encoded naively then crate metadata would be very large. To handle this, each `TyEncoder` has a cache of locations in its output where it has serialized types. If a type being encoded -is in cache, then instead of serializing the type as usual, the byte offset +is in the cache, then instead of serializing the type as usual, the byte offset within the file being written is encoded instead. A similar scheme is used for `ty::Predicate`.