From bb9f8e11cfa8f5c1883b47158354ab3a87ab497b Mon Sep 17 00:00:00 2001 From: Philip Conrad Date: Wed, 29 May 2024 18:26:08 -0400 Subject: [PATCH] bundle: Preallocate buffers for file contents. This commit adds logic to preallocate buffers when loading files from both tarballs and on-disk bundle directories. The change results in lower max RSS memory usage at runtime, and better garbage collector performance, especially when at lower values of GOMAXPROCS. For very large bundles (>1 GB in size), this change can lower startup times for OPA by as much as a full second. The performance analysis was different than for most changes-- heap usage increased by about 10% during bundle loading, which made the change look bad at first. Some of the effect appears to be from the Go compiler no longer inlining as far up the call chain during bundle loading (visible in the `pprof` graphs). Running with `GODEBUG=gctrace=1` and varying GOMAXPROCS allowed seeing a fuller picture of how performance changes from preallocation, which results in much less garbage for the collector, and a noticeable speedup in wall-clock time the GC burns during bundle loading. Signed-off-by: Philip Conrad --- bundle/bundle.go | 41 +++++++++++++++++++++++++++++++++++++++++ bundle/file.go | 7 ++++--- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/bundle/bundle.go b/bundle/bundle.go index 9c02568b535..e7c7d216664 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -15,6 +15,7 @@ import ( "fmt" "io" "net/url" + "os" "path" "path/filepath" "reflect" @@ -1667,6 +1668,7 @@ func preProcessBundle(loader DirectoryLoader, skipVerify bool, sizeLimitBytes in } func readFile(f *Descriptor, sizeLimitBytes int64) (bytes.Buffer, error) { + // Case for pre-loaded byte buffers, like those from the tarballLoader. if bb, ok := f.reader.(*bytes.Buffer); ok { _ = f.Close() // always close, even on error @@ -1678,6 +1680,37 @@ func readFile(f *Descriptor, sizeLimitBytes int64) (bytes.Buffer, error) { return *bb, nil } + // Case for *lazyFile readers: + if lf, ok := f.reader.(*lazyFile); ok { + var buf bytes.Buffer + if lf.file == nil { + var err error + if lf.file, err = os.Open(lf.path); err != nil { + return buf, fmt.Errorf("failed to open file %s: %w", f.path, err) + } + } + // Bail out if we can't read the whole file-- there's nothing useful we can do at that point! + fileSize, _ := fstatFileSize(lf.file) + if fileSize > sizeLimitBytes { + return buf, fmt.Errorf(maxSizeLimitBytesErrMsg, strings.TrimPrefix(f.Path(), "/"), fileSize, sizeLimitBytes-1) + } + // Prealloc the buffer for the file read. + buffer := make([]byte, fileSize) + _, err := lf.file.Read(buffer) + if err != nil { + return buf, err + } + _ = lf.file.Close() // always close, even on error + + // Note(philipc): Replace the lazyFile reader in the *Descriptor with a + // pointer to the wrapping bytes.Buffer, so that we don't re-read the + // file on disk again by accident. + buf = *bytes.NewBuffer(buffer) + f.reader = &buf + return buf, nil + } + + // Fallback case: var buf bytes.Buffer n, err := f.Read(&buf, sizeLimitBytes) _ = f.Close() // always close, even on error @@ -1691,6 +1724,14 @@ func readFile(f *Descriptor, sizeLimitBytes int64) (bytes.Buffer, error) { return buf, nil } +// Takes an already open file handle and invokes the os.Stat system call on it +// to determine the file's size. +func fstatFileSize(f *os.File) (int64, error) { + fileInfo, err := f.Stat() + fileSize := fileInfo.Size() + return fileSize, err +} + func normalizePath(p string) string { return filepath.ToSlash(p) } diff --git a/bundle/file.go b/bundle/file.go index 62ffeeff5fd..2b9ba141774 100644 --- a/bundle/file.go +++ b/bundle/file.go @@ -370,12 +370,13 @@ func (t *tarballLoader) NextFile() (*Descriptor, error) { f := file{name: header.Name} - var buf bytes.Buffer - if _, err := io.Copy(&buf, t.tr); err != nil { + // Note(philipc): We rely on the previous size check in this loop for safety. + buf := bytes.NewBuffer(make([]byte, 0, header.Size)) + if _, err := io.Copy(buf, t.tr); err != nil { return nil, fmt.Errorf("failed to copy file %s: %w", header.Name, err) } - f.reader = &buf + f.reader = buf t.files = append(t.files, f) } else if header.Typeflag == tar.TypeDir {