Skip to content

Commit

Permalink
Auto merge of #100209 - cjgillot:source-file-index, r=estebank
Browse files Browse the repository at this point in the history
Lazily decode SourceFile from metadata

Currently, source files from foreign crates are decoded up-front from metadata.
Spans from those crates were matched with the corresponding source using binary search among those files.

This PR changes the strategy by matching spans to files during encoding. This allows to decode source files on-demand, instead of up-front. The on-disk format for spans becomes: `<tag> <position from start of file> <length> <file index> <crate (if foreign file)>`.
  • Loading branch information
bors committed Aug 19, 2022
2 parents 6c943ba + 0d41f91 commit 468887e
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 224 deletions.
228 changes: 102 additions & 126 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ pub(crate) struct CrateMetadata {
/// Proc macro descriptions for this crate, if it's a proc macro crate.
raw_proc_macros: Option<&'static [ProcMacro]>,
/// Source maps for code from the crate.
source_map_import_info: OnceCell<Vec<ImportedSourceFile>>,
source_map_import_info: Lock<Vec<Option<ImportedSourceFile>>>,
/// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`.
def_path_hash_map: DefPathHashMapRef<'static>,
/// Likewise for ExpnHash.
Expand Down Expand Up @@ -143,7 +143,8 @@ pub(crate) struct CrateMetadata {
}

/// Holds information about a rustc_span::SourceFile imported from another crate.
/// See `imported_source_files()` for more information.
/// See `imported_source_file()` for more information.
#[derive(Clone)]
struct ImportedSourceFile {
/// This SourceFile's byte-offset within the source_map of its original crate
original_start_pos: rustc_span::BytePos,
Expand All @@ -160,9 +161,6 @@ pub(super) struct DecodeContext<'a, 'tcx> {
sess: Option<&'tcx Session>,
tcx: Option<TyCtxt<'tcx>>,

// Cache the last used source_file for translating spans as an optimization.
last_source_file_index: usize,

lazy_state: LazyState,

// Used for decoding interpret::AllocIds in a cached & thread-safe manner.
Expand Down Expand Up @@ -191,7 +189,6 @@ pub(super) trait Metadata<'a, 'tcx>: Copy {
blob: self.blob(),
sess: self.sess().or(tcx.map(|tcx| tcx.sess)),
tcx,
last_source_file_index: 0,
lazy_state: LazyState::NoNode,
alloc_decoding_session: self
.cdata()
Expand Down Expand Up @@ -527,6 +524,9 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
bug!("Cannot decode Span without Session.")
};

// Index of the file in the corresponding crate's list of encoded files.
let metadata_index = u32::decode(decoder);

// There are two possibilities here:
// 1. This is a 'local span', which is located inside a `SourceFile`
// that came from this crate. In this case, we use the source map data
Expand All @@ -553,10 +553,10 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
// to be based on the *foreign* crate (e.g. crate C), not the crate
// we are writing metadata for (e.g. crate B). This allows us to
// treat the 'local' and 'foreign' cases almost identically during deserialization:
// we can call `imported_source_files` for the proper crate, and binary search
// we can call `imported_source_file` for the proper crate, and binary search
// through the returned slice using our span.
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
decoder.cdata().imported_source_files(sess)
let source_file = if tag == TAG_VALID_SPAN_LOCAL {
decoder.cdata().imported_source_file(metadata_index, sess)
} else {
// When we encode a proc-macro crate, all `Span`s should be encoded
// with `TAG_VALID_SPAN_LOCAL`
Expand All @@ -577,60 +577,30 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
cnum
);

// Decoding 'foreign' spans should be rare enough that it's
// not worth it to maintain a per-CrateNum cache for `last_source_file_index`.
// We just set it to 0, to ensure that we don't try to access something out
// of bounds for our initial 'guess'
decoder.last_source_file_index = 0;

let foreign_data = decoder.cdata().cstore.get_crate_data(cnum);
foreign_data.imported_source_files(sess)
foreign_data.imported_source_file(metadata_index, sess)
};

let source_file = {
// Optimize for the case that most spans within a translated item
// originate from the same source_file.
let last_source_file = &imported_source_files[decoder.last_source_file_index];

if lo >= last_source_file.original_start_pos && lo <= last_source_file.original_end_pos
{
last_source_file
} else {
let index = imported_source_files
.binary_search_by_key(&lo, |source_file| source_file.original_start_pos)
.unwrap_or_else(|index| index - 1);

// Don't try to cache the index for foreign spans,
// as this would require a map from CrateNums to indices
if tag == TAG_VALID_SPAN_LOCAL {
decoder.last_source_file_index = index;
}
&imported_source_files[index]
}
};

// Make sure our binary search above is correct.
// Make sure our span is well-formed.
debug_assert!(
lo >= source_file.original_start_pos && lo <= source_file.original_end_pos,
"Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
lo + source_file.original_start_pos <= source_file.original_end_pos,
"Malformed encoded span: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
lo,
source_file.original_start_pos,
source_file.original_end_pos
);

// Make sure we correctly filtered out invalid spans during encoding
// Make sure we correctly filtered out invalid spans during encoding.
debug_assert!(
hi >= source_file.original_start_pos && hi <= source_file.original_end_pos,
"Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
hi + source_file.original_start_pos <= source_file.original_end_pos,
"Malformed encoded span: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
hi,
source_file.original_start_pos,
source_file.original_end_pos
);

let lo =
(lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos;
let hi =
(hi + source_file.translated_source_file.start_pos) - source_file.original_start_pos;
let lo = lo + source_file.translated_source_file.start_pos;
let hi = hi + source_file.translated_source_file.start_pos;

// Do not try to decode parent for foreign spans.
Span::new(lo, hi, ctxt, None)
Expand Down Expand Up @@ -1482,7 +1452,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
///
/// Proc macro crates don't currently export spans, so this function does not have
/// to work for them.
fn imported_source_files(self, sess: &Session) -> &'a [ImportedSourceFile] {
fn imported_source_file(self, source_file_index: u32, sess: &Session) -> ImportedSourceFile {
fn filter<'a>(sess: &Session, path: Option<&'a Path>) -> Option<&'a Path> {
path.filter(|_| {
// Only spend time on further checks if we have what to translate *to*.
Expand Down Expand Up @@ -1570,90 +1540,96 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
}
};

self.cdata.source_map_import_info.get_or_init(|| {
let external_source_map = self.root.source_map.decode(self);

external_source_map
.map(|source_file_to_import| {
// We can't reuse an existing SourceFile, so allocate a new one
// containing the information we need.
let rustc_span::SourceFile {
mut name,
src_hash,
start_pos,
end_pos,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
..
} = source_file_to_import;

// If this file is under $sysroot/lib/rustlib/src/ but has not been remapped
// during rust bootstrapping by `remap-debuginfo = true`, and the user
// wish to simulate that behaviour by -Z simulate-remapped-rust-src-base,
// then we change `name` to a similar state as if the rust was bootstrapped
// with `remap-debuginfo = true`.
// This is useful for testing so that tests about the effects of
// `try_to_translate_virtual_to_real` don't have to worry about how the
// compiler is bootstrapped.
if let Some(virtual_dir) =
&sess.opts.unstable_opts.simulate_remapped_rust_src_base
{
if let Some(real_dir) = &sess.opts.real_rust_source_base_dir {
if let rustc_span::FileName::Real(ref mut old_name) = name {
if let rustc_span::RealFileName::LocalPath(local) = old_name {
if let Ok(rest) = local.strip_prefix(real_dir) {
*old_name = rustc_span::RealFileName::Remapped {
local_path: None,
virtual_name: virtual_dir.join(rest),
};
}
let mut import_info = self.cdata.source_map_import_info.lock();
for _ in import_info.len()..=(source_file_index as usize) {
import_info.push(None);
}
import_info[source_file_index as usize]
.get_or_insert_with(|| {
let source_file_to_import = self
.root
.source_map
.get(self, source_file_index)
.expect("missing source file")
.decode(self);

// We can't reuse an existing SourceFile, so allocate a new one
// containing the information we need.
let rustc_span::SourceFile {
mut name,
src_hash,
start_pos,
end_pos,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
..
} = source_file_to_import;

// If this file is under $sysroot/lib/rustlib/src/ but has not been remapped
// during rust bootstrapping by `remap-debuginfo = true`, and the user
// wish to simulate that behaviour by -Z simulate-remapped-rust-src-base,
// then we change `name` to a similar state as if the rust was bootstrapped
// with `remap-debuginfo = true`.
// This is useful for testing so that tests about the effects of
// `try_to_translate_virtual_to_real` don't have to worry about how the
// compiler is bootstrapped.
if let Some(virtual_dir) = &sess.opts.unstable_opts.simulate_remapped_rust_src_base
{
if let Some(real_dir) = &sess.opts.real_rust_source_base_dir {
if let rustc_span::FileName::Real(ref mut old_name) = name {
if let rustc_span::RealFileName::LocalPath(local) = old_name {
if let Ok(rest) = local.strip_prefix(real_dir) {
*old_name = rustc_span::RealFileName::Remapped {
local_path: None,
virtual_name: virtual_dir.join(rest),
};
}
}
}
}
}

// If this file's path has been remapped to `/rustc/$hash`,
// we might be able to reverse that (also see comments above,
// on `try_to_translate_virtual_to_real`).
try_to_translate_virtual_to_real(&mut name);

let source_length = (end_pos - start_pos).to_usize();

let local_version = sess.source_map().new_imported_source_file(
name,
src_hash,
name_hash,
source_length,
self.cnum,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
start_pos,
end_pos,
);
debug!(
"CrateMetaData::imported_source_files alloc \
// If this file's path has been remapped to `/rustc/$hash`,
// we might be able to reverse that (also see comments above,
// on `try_to_translate_virtual_to_real`).
try_to_translate_virtual_to_real(&mut name);

let source_length = (end_pos - start_pos).to_usize();

let local_version = sess.source_map().new_imported_source_file(
name,
src_hash,
name_hash,
source_length,
self.cnum,
lines,
multibyte_chars,
non_narrow_chars,
normalized_pos,
start_pos,
source_file_index,
);
debug!(
"CrateMetaData::imported_source_files alloc \
source_file {:?} original (start_pos {:?} end_pos {:?}) \
translated (start_pos {:?} end_pos {:?})",
local_version.name,
start_pos,
end_pos,
local_version.start_pos,
local_version.end_pos
);
local_version.name,
start_pos,
end_pos,
local_version.start_pos,
local_version.end_pos
);

ImportedSourceFile {
original_start_pos: start_pos,
original_end_pos: end_pos,
translated_source_file: local_version,
}
})
.collect()
})
ImportedSourceFile {
original_start_pos: start_pos,
original_end_pos: end_pos,
translated_source_file: local_version,
}
})
.clone()
}

fn get_generator_diagnostic_data(
Expand Down Expand Up @@ -1716,7 +1692,7 @@ impl CrateMetadata {
trait_impls,
incoherent_impls: Default::default(),
raw_proc_macros,
source_map_import_info: OnceCell::new(),
source_map_import_info: Lock::new(Vec::new()),
def_path_hash_map,
expn_hash_map: Default::default(),
alloc_decoding_state,
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,9 @@ impl CrateStore for CStore {
}

fn import_source_files(&self, sess: &Session, cnum: CrateNum) {
self.get_crate_data(cnum).imported_source_files(sess);
let cdata = self.get_crate_data(cnum);
for file_index in 0..cdata.root.source_map.size() {
cdata.imported_source_file(file_index as u32, sess);
}
}
}
Loading

0 comments on commit 468887e

Please sign in to comment.