From 15c20536c7bbbd8a2a45374e33825e58277b89a8 Mon Sep 17 00:00:00 2001 From: Janusz Marcinkiewicz Date: Fri, 14 Jun 2024 14:37:41 +0200 Subject: [PATCH] ios: replace `du` with raw syscalls Signed-off-by: Janusz Marcinkiewicz --- fs/fs.go | 3 +- ios/diskstats_darwin.go | 2 +- ios/fsutils_darwin.go | 32 +++++++--- ios/fsutils_linux.go | 126 ++++++++++++++++++++++++++++++++++++++-- ios/fsutils_test.go | 93 ++++++++++++++--------------- ios/fsutils_unix.go | 49 ---------------- ios/ios_suite_test.go | 18 ++++++ tools/file.go | 23 +++++--- 8 files changed, 224 insertions(+), 122 deletions(-) create mode 100644 ios/ios_suite_test.go diff --git a/fs/fs.go b/fs/fs.go index 3660489080..0885f6c410 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -467,7 +467,6 @@ func (mi *Mountpath) onDiskSize(bck *cmn.Bck, prefix string) (uint64, error) { } else { dirPath = filepath.Join(mi.MakePathCT(bck, ObjectType), prefix) if cos.Stat(dirPath) != nil { - dirPath += "*" // prefix is _not_ a directory withNonDirPrefix = true // ok to fail matching } } @@ -1062,7 +1061,7 @@ func OnDiskSize(bck *cmn.Bck, prefix string) (size uint64) { sz, err := mi.onDiskSize(bck, prefix) if err != nil { if cmn.Rom.FastV(4, cos.SmoduleFS) { - nlog.Warningln("failed to 'du':", err, "["+mi.String(), bck.String(), prefix+"]") + nlog.Warningln("failed to calculate size on disk:", err, "["+mi.String(), bck.String(), prefix+"]") } return 0 } diff --git a/ios/diskstats_darwin.go b/ios/diskstats_darwin.go index d5df9c4535..3436a64963 100644 --- a/ios/diskstats_darwin.go +++ b/ios/diskstats_darwin.go @@ -1,7 +1,7 @@ // Package ios is a collection of interfaces to the local storage subsystem; // the package includes OS-dependent implementations for those interfaces. /* - * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. */ package ios diff --git a/ios/fsutils_darwin.go b/ios/fsutils_darwin.go index fe5569529a..12ca276662 100644 --- a/ios/fsutils_darwin.go +++ b/ios/fsutils_darwin.go @@ -1,24 +1,42 @@ // Package ios is a collection of interfaces to the local storage subsystem; // the package includes OS-dependent implementations for those interfaces. /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. */ package ios import ( "os" - "os/exec" + "path/filepath" + "strings" "syscall" "time" + "github.com/karrick/godirwalk" "golang.org/x/sys/unix" ) -func DirSizeOnDisk(dirPath string, withNonDirPrefix bool) (uint64, error) { - // BSD implementation of du uses -A option for apparent size and -c to show a total - cmd := exec.Command("du", "-Ac", dirPath) - // Output block size with -A option will be 512 - return executeDU(cmd, dirPath, withNonDirPrefix, 512) +func DirSizeOnDisk(originalDirPath string, withNonDirPrefix bool) (size uint64, err error) { + dirPath := originalDirPath + if withNonDirPrefix { + dirPath, _ = filepath.Split(originalDirPath) + } + err = godirwalk.Walk(dirPath, &godirwalk.Options{Callback: func(osPathname string, entry *godirwalk.Dirent) error { + if !entry.IsDir() && !entry.IsRegular() { + return nil + } + // If prefix is set we should skip all the names that do not have the prefix. + if withNonDirPrefix && !strings.HasPrefix(osPathname, originalDirPath) { + return nil + } + stat, err := os.Lstat(osPathname) + if err != nil { + return err + } + size += uint64(stat.Size()) + return nil + }}) + return } func GetFSStats(path string) (blocks, bavail uint64, bsize int64, err error) { diff --git a/ios/fsutils_linux.go b/ios/fsutils_linux.go index 63dcbf5a2f..79c3f3f867 100644 --- a/ios/fsutils_linux.go +++ b/ios/fsutils_linux.go @@ -1,23 +1,137 @@ // Package ios is a collection of interfaces to the local storage subsystem; // the package includes OS-dependent implementations for those interfaces. /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. */ package ios import ( + "errors" + "fmt" "os" - "os/exec" + "path/filepath" "syscall" "time" + "unsafe" + "github.com/NVIDIA/aistore/cmn/cos" "golang.org/x/sys/unix" ) -func DirSizeOnDisk(dirPath string, withNonDirPrefix bool) (uint64, error) { - // GNU implementation of du uses -b to get apparent size with a block size of 1 and -c to show a total - cmd := exec.Command("du", "-bc", dirPath) - return executeDU(cmd, dirPath, withNonDirPrefix, 1) +const maxStackSize = 65536 +const dirOpenMode = syscall.O_RDONLY | syscall.O_NOCTTY | syscall.O_NONBLOCK | syscall.O_NOFOLLOW | syscall.O_CLOEXEC | syscall.O_DIRECTORY + +// DirSizeOnDisk calculates the total size of a directory on disk, including its subdirectories. +// TODO: We should also calculate and include `xattr` size. +func DirSizeOnDisk(dirPath string, withNonDirPrefix bool) (size uint64, err error) { + var prefix string + if withNonDirPrefix { + dirPath, prefix = filepath.Split(dirPath) + } + + fd, err := syscall.Open(dirPath, dirOpenMode, 0) + if err != nil { + return size, err + } + defer syscall.Close(fd) + + var stat syscall.Stat_t + if err := syscall.Fstat(fd, &stat); err != nil { + return size, err + } + + size, err = dirSizeOnDiskFD(fd, prefix, 0) + return size + uint64(stat.Size), err +} + +// dirSizeOnDiskFD calculates directory size on disk based on the opened +// file descriptor to said directory. +func dirSizeOnDiskFD(fd int, prefix string, stackSize int) (size uint64, err error) { + if stackSize >= maxStackSize { + return size, fmt.Errorf("DirSizeOnDisk stack overflow, exceeded maximum size of %d nested directories", maxStackSize) + } + + buf := make([]byte, 16*cos.KiB) + for { + n, err := syscall.ReadDirent(fd, buf) + if err != nil { + // syscall.EINTR - interrupted by signal. + if errors.Is(err, syscall.EINTR) { + continue + } + // syscall.EINVAL - can occur when reading protected directory. + if errors.Is(err, syscall.EINVAL) { + return size, nil + } + return size, err + } + if n <= 0 { // end of directory: normal exit + return size, nil + } + workBuffer := buf[:n] // trim work buffer to number of bytes read + + for len(workBuffer) > 0 { + var sde syscall.Dirent + copy((*[unsafe.Sizeof(syscall.Dirent{})]byte)(unsafe.Pointer(&sde))[:], workBuffer) + workBuffer = workBuffer[sde.Reclen:] // Advance buffer for next iteration through loop. + + // Skip `.` and `..` dirents as well as `inode == 0` (inode marked for deletion). + if sde.Ino == 0 || nameEqual(&sde.Name, "") || nameEqual(&sde.Name, ".") || nameEqual(&sde.Name, "..") { + continue + } + // If prefix is set we should skip all the names that do not have the prefix. + if prefix != "" && !nameHasPrefix(&sde.Name, prefix) { + continue + } + + // Skip anything except files and directories. + if sde.Type != syscall.DT_REG && sde.Type != syscall.DT_DIR { + continue + } + + var stat syscall.Stat_t + // TODO: We might consider defining `FstatatBuf(...)` for different architectures: + // * `SYS_FSTATAT` - linux && arm64 + _, _, errno := syscall.Syscall6(syscall.SYS_NEWFSTATAT, uintptr(fd), uintptr(unsafe.Pointer(&sde.Name[0])), uintptr(unsafe.Pointer(&stat)), uintptr(unix.AT_SYMLINK_NOFOLLOW), 0, 0) + if errno != 0 { + return size, errno + } + size += uint64(stat.Size) + + if sde.Type == syscall.DT_DIR { + fd, _, errno := syscall.Syscall6(syscall.SYS_OPENAT, uintptr(fd), uintptr(unsafe.Pointer(&sde.Name[0])), uintptr(dirOpenMode), uintptr(0), 0, 0) + if errno != 0 { + // syscall.EPERM - permission denied to open directory. + if errors.Is(err, syscall.EPERM) { + continue + } + return size, errno + } + n, err := dirSizeOnDiskFD(int(fd), "", stackSize+1) + _ = syscall.Close(int(fd)) + if err != nil { + return size, err + } + size += n + } + } + } +} + +func nameEqual(name *[256]int8, s string) bool { + return nameHasPrefix(name, s) && name[len(s)] == '\x00' +} + +func nameHasPrefix(name *[256]int8, s string) bool { + if len(s) >= 255 { // Not 256 because we know that `name` has NULL character. + return false + } + for i := range len(s) { + if byte(name[i]) != s[i] { + return false + } + } + return true } func GetFSStats(path string) (blocks, bavail uint64, bsize int64, err error) { diff --git a/ios/fsutils_test.go b/ios/fsutils_test.go index ab14853d6a..92e138c3d9 100644 --- a/ios/fsutils_test.go +++ b/ios/fsutils_test.go @@ -1,64 +1,57 @@ // Package ios is a collection of interfaces to the local storage subsystem; // the package includes OS-dependent implementations for those interfaces. /* - * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. */ package ios_test import ( - "os" - "path" - "testing" - - "github.com/NVIDIA/aistore/cmn/cos" - "github.com/NVIDIA/aistore/cmn/mono" "github.com/NVIDIA/aistore/ios" + "github.com/NVIDIA/aistore/tools" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" ) -func TestGetFSUsedPercentage(t *testing.T) { - percentage, ok := ios.GetFSUsedPercentage("/") - if !ok { - t.Error("Unable to retrieve FS used percentage!") - } - if percentage > 100 { - t.Errorf("Invalid FS used percentage [%d].", percentage) - } -} +var _ = Describe("fsutils", func() { + Describe("GetFSUsedPercentage", func() { + It("should", func() { + percentage, ok := ios.GetFSUsedPercentage("/") + Expect(ok).To(BeTrue(), "Unable to retrieve FS used percentage!") + Expect(percentage).To(BeNumerically("<=", 100), "Invalid FS used percentage: %d", percentage) + }) + }) -func TestDirSize(t *testing.T) { - name, err := os.MkdirTemp("/tmp", t.Name()) - if err != nil { - t.Error(err) - return - } - defer os.RemoveAll(name) + Describe("DirSizeOnDisk", func() { + var ( + rootDir string + files []string + ) - size := mkFile(t, name, "file.txt") + BeforeEach(func() { + rootDir, files = tools.PrepareDirTree(GinkgoTB(), tools.DirTreeDesc{ + InitDir: "", + Dirs: 10, + Files: 10, + FileSize: 1024, + Depth: 5, + Empty: true, + }) + }) - totalSize, err := ios.DirSizeOnDisk(name, false /*withNonDirPrefix*/) - if err != nil { - t.Error(err) - } - if totalSize < uint64(size) { - t.Fatalf("Dir size %d < %d file", totalSize, size) - } -} + Describe("withoutPrefix", func() { + It("should calculate size correctly", func() { + size, err := ios.DirSizeOnDisk(rootDir, false /*withNonDirPrefix*/) + Expect(err).NotTo(HaveOccurred()) + Expect(size).To(BeNumerically(">", 50*1024)) + }) + }) -func mkFile(t *testing.T, dir, fname string) (written int) { - k := mono.NanoTime() & 0xff - f, err := os.Create(path.Join(dir, fname)) - if err != nil { - t.Error(err) - return - } - size := cos.KiB * int(k) - written, err = f.Write(make([]byte, size)) - f.Close() - if err != nil { - t.Error(err) - } - if written != size { - t.Fatalf("written %d != %d", size, written) - } - return -} + Describe("withPrefix", func() { + It("should calculate size correctly", func() { + size, err := ios.DirSizeOnDisk(files[0], true /*withNonDirPrefix*/) + Expect(err).NotTo(HaveOccurred()) + Expect(size).To(BeNumerically(">=", 1024)) + }) + }) + }) +}) diff --git a/ios/fsutils_unix.go b/ios/fsutils_unix.go index d9d5a0a78d..d649e5b766 100644 --- a/ios/fsutils_unix.go +++ b/ios/fsutils_unix.go @@ -6,12 +6,6 @@ package ios import ( - "fmt" - "os/exec" - "strconv" - "strings" - - "github.com/NVIDIA/aistore/cmn/debug" "github.com/NVIDIA/aistore/cmn/nlog" "golang.org/x/sys/unix" ) @@ -22,46 +16,3 @@ func getFSStats(path string) (fsStats unix.Statfs_t, err error) { } return } - -// - on-disk size is sometimes referred to as "apparent size" -// - `withNonDirPrefix` is allowed to match nothing -// - TODO: carefully differentiate FATAL err-s: access perm-s, invalid command-line, executable missing -func executeDU(cmd *exec.Cmd, dirPath string, withNonDirPrefix bool, outputBlockSize uint64) (uint64, error) { - out, err := cmd.CombinedOutput() - if err != nil { - switch { - case len(out) == 0: - return 0, fmt.Errorf("du %s: combined output empty, err: %v", dirPath, err) - default: - return 0, fmt.Errorf("failed to du %s: %v (%s)", dirPath, err, string(out)) - } - } - - lines := strings.Split(string(out), "\n") // on Windows, use instead strings.FieldsFunc('\n' and '\r'), here and elsewhere - if n := len(lines); n > 8 { - lines = lines[n-8:] - } - // e.g.: "12345 total" - for i := len(lines) - 1; i >= 0; i-- { - s := lines[i] - if strings.HasSuffix(s, "total") && s[0] > '0' && s[0] <= '9' { - return uint64(_parseTotal(s)) * outputBlockSize, nil - } - } - if !withNonDirPrefix { - err = fmt.Errorf("failed to parse 'du %s': ...%v", dirPath, lines) - } - return 0, err -} - -func _parseTotal(s string) (size int64) { - var err error - for i := range len(s) { - if s[i] < '0' || s[i] > '9' { - size, err = strconv.ParseInt(s[:i], 10, 64) - debug.AssertNoErr(err) - break - } - } - return -} diff --git a/ios/ios_suite_test.go b/ios/ios_suite_test.go new file mode 100644 index 0000000000..59dac9bb25 --- /dev/null +++ b/ios/ios_suite_test.go @@ -0,0 +1,18 @@ +// Package ios is a collection of interfaces to the local storage subsystem; +// the package includes OS-dependent implementations for those interfaces. +/* + * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. + */ +package ios_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestIOS(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, t.Name()) +} diff --git a/tools/file.go b/tools/file.go index ec2c59ef02..e9f61e0792 100644 --- a/tools/file.go +++ b/tools/file.go @@ -28,11 +28,12 @@ import ( type ( DirTreeDesc struct { - InitDir string // Directory where the tree is created (can be empty). - Dirs int // Number of (initially empty) directories at each depth (we recurse into single directory at each depth). - Files int // Number of files at each depth. - Depth int // Depth of tree/nesting. - Empty bool // Determines if there is a file somewhere in the directories. + InitDir string // Directory where the tree is created (can be empty). + Dirs int // Number of (initially empty) directories at each depth (we recurse into single directory at each depth). + Files int // Number of files at each depth. + FileSize int64 // Size of each file. + Depth int // Depth of tree/nesting. + Empty bool // Determines if there is a file somewhere in the directories. } ContentTypeDesc struct { @@ -106,8 +107,12 @@ func PrepareDirTree(tb testing.TB, desc DirTreeDesc) (string, []string) { for i := 1; i <= desc.Files; i++ { f, err := os.CreateTemp(nestedDirectoryName, "") tassert.CheckFatal(tb, err) + if desc.FileSize > 0 { + io.Copy(f, io.LimitReader(cryptorand.Reader, desc.FileSize)) + } fileNames = append(fileNames, f.Name()) - f.Close() + err = f.Close() + tassert.CheckFatal(tb, err) } sort.Strings(names) if desc.Dirs > 0 { @@ -119,8 +124,12 @@ func PrepareDirTree(tb testing.TB, desc DirTreeDesc) (string, []string) { if !desc.Empty { f, err := os.CreateTemp(nestedDirectoryName, "") tassert.CheckFatal(tb, err) + if desc.FileSize > 0 { + io.Copy(f, io.LimitReader(cryptorand.Reader, desc.FileSize)) + } fileNames = append(fileNames, f.Name()) - f.Close() + err = f.Close() + tassert.CheckFatal(tb, err) } return topDirName, fileNames }