-
Notifications
You must be signed in to change notification settings - Fork 17.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
internal,os: employ copy_file_range(2) for file-to-file copying on Fr…
…eeBSD FreeBSD 13.0 introduced the Linux-compatible copy_file_range(2) system call, we should make use of it. Ref: https://www.gnu.org/software/gnulib/manual/html_node/copy_005ffile_005frange.html https://reviews.freebsd.org/D20584?id=60021 https://man.freebsd.org/cgi/man.cgi?copy_file_range(2) Change-Id: I75edb5629717289c8887be436613d3a8b3820bdc Reviewed-on: https://go-review.googlesource.com/c/go/+/604655 Run-TryBot: Andy Pan <[email protected]> Reviewed-by: Carlos Amedee <[email protected]> TryBot-Result: Gopher Robot <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]> Auto-Submit: Ian Lance Taylor <[email protected]>
- Loading branch information
Showing
14 changed files
with
389 additions
and
102 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package poll | ||
|
||
import ( | ||
"internal/syscall/unix" | ||
"syscall" | ||
) | ||
|
||
func supportCopyFileRange() bool { | ||
return unix.SupportCopyFileRange() | ||
} | ||
|
||
// For best performance, call copy_file_range() with the largest len value | ||
// possible. It is interruptible on most file systems, so there is no penalty | ||
// for using very large len values, even SSIZE_MAX. | ||
const maxCopyFileRangeRound = 1<<31 - 1 | ||
|
||
func handleCopyFileRangeErr(err error, copied, written int64) (bool, error) { | ||
switch err { | ||
case syscall.ENOSYS: | ||
// The copy_file_range(2) function first appeared in FreeBSD 13.0. | ||
// Go supports FreeBSD>= 12, so the system call | ||
// may not be present. We've detected the FreeBSD version with | ||
// unix.SupportCopyFileRange() at the beginning of this function, | ||
// but we still want to check for ENOSYS here to prevent some rare | ||
// case like https://go.dev/issue/58592 | ||
// | ||
// If we see ENOSYS, we have certainly not transferred | ||
// any data, so we can tell the caller that we | ||
// couldn't handle the transfer and let them fall | ||
// back to more generic code. | ||
return false, nil | ||
case syscall.EFBIG, syscall.EINVAL, syscall.EIO: | ||
// For EFBIG, the copy has exceeds the process's file size limit | ||
// or the maximum file size for the filesystem dst resides on, in | ||
// this case, we leave it to generic copy. | ||
// | ||
// For EINVAL, there could be a few reasons: | ||
// 1. Either dst or src refers to a file object that | ||
// is not a regular file, for instance, a pipe. | ||
// 2. src and dst refer to the same file and byte ranges | ||
// overlap. | ||
// 3. The flags argument is not 0. | ||
// Neither of these cases should be considered handled by | ||
// copy_file_range(2) because there is no data transfer, so | ||
// just fall back to generic copy. | ||
return false, nil | ||
} | ||
return true, err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,10 @@ import ( | |
"syscall" | ||
) | ||
|
||
func supportCopyFileRange() bool { | ||
return isKernelVersionGE53() | ||
} | ||
|
||
var isKernelVersionGE53 = sync.OnceValue(func() bool { | ||
major, minor := unix.KernelVersion() | ||
// copy_file_range(2) is broken in various ways on kernels older than 5.3, | ||
|
@@ -20,102 +24,54 @@ var isKernelVersionGE53 = sync.OnceValue(func() bool { | |
|
||
const maxCopyFileRangeRound = 1 << 30 | ||
|
||
// CopyFileRange copies at most remain bytes of data from src to dst, using | ||
// the copy_file_range system call. dst and src must refer to regular files. | ||
func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) { | ||
if !isKernelVersionGE53() { | ||
return 0, false, nil | ||
} | ||
|
||
for remain > 0 { | ||
max := remain | ||
if max > maxCopyFileRangeRound { | ||
max = maxCopyFileRangeRound | ||
} | ||
n, err := copyFileRange(dst, src, int(max)) | ||
switch err { | ||
case syscall.ENOSYS: | ||
// copy_file_range(2) was introduced in Linux 4.5. | ||
// Go supports Linux >= 2.6.33, so the system call | ||
// may not be present. | ||
// | ||
// If we see ENOSYS, we have certainly not transferred | ||
// any data, so we can tell the caller that we | ||
// couldn't handle the transfer and let them fall | ||
// back to more generic code. | ||
return 0, false, nil | ||
case syscall.EXDEV, syscall.EINVAL, syscall.EIO, syscall.EOPNOTSUPP, syscall.EPERM: | ||
// Prior to Linux 5.3, it was not possible to | ||
// copy_file_range across file systems. Similarly to | ||
// the ENOSYS case above, if we see EXDEV, we have | ||
// not transferred any data, and we can let the caller | ||
// fall back to generic code. | ||
// | ||
// As for EINVAL, that is what we see if, for example, | ||
// dst or src refer to a pipe rather than a regular | ||
// file. This is another case where no data has been | ||
// transferred, so we consider it unhandled. | ||
// | ||
// If src and dst are on CIFS, we can see EIO. | ||
// See issue #42334. | ||
// | ||
// If the file is on NFS, we can see EOPNOTSUPP. | ||
// See issue #40731. | ||
// | ||
// If the process is running inside a Docker container, | ||
// we might see EPERM instead of ENOSYS. See issue | ||
// #40893. Since EPERM might also be a legitimate error, | ||
// don't mark copy_file_range(2) as unsupported. | ||
return 0, false, nil | ||
case nil: | ||
if n == 0 { | ||
// If we did not read any bytes at all, | ||
// then this file may be in a file system | ||
// where copy_file_range silently fails. | ||
// https://lore.kernel.org/linux-fsdevel/[email protected]/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0 | ||
if written == 0 { | ||
return 0, false, nil | ||
} | ||
// Otherwise src is at EOF, which means | ||
// we are done. | ||
return written, true, nil | ||
func handleCopyFileRangeErr(err error, copied, written int64) (bool, error) { | ||
switch err { | ||
case syscall.ENOSYS: | ||
// copy_file_range(2) was introduced in Linux 4.5. | ||
// Go supports Linux >= 2.6.33, so the system call | ||
// may not be present. | ||
// | ||
// If we see ENOSYS, we have certainly not transferred | ||
// any data, so we can tell the caller that we | ||
// couldn't handle the transfer and let them fall | ||
// back to more generic code. | ||
return false, nil | ||
case syscall.EXDEV, syscall.EINVAL, syscall.EIO, syscall.EOPNOTSUPP, syscall.EPERM: | ||
// Prior to Linux 5.3, it was not possible to | ||
// copy_file_range across file systems. Similarly to | ||
// the ENOSYS case above, if we see EXDEV, we have | ||
// not transferred any data, and we can let the caller | ||
// fall back to generic code. | ||
// | ||
// As for EINVAL, that is what we see if, for example, | ||
// dst or src refer to a pipe rather than a regular | ||
// file. This is another case where no data has been | ||
// transferred, so we consider it unhandled. | ||
// | ||
// If src and dst are on CIFS, we can see EIO. | ||
// See issue #42334. | ||
// | ||
// If the file is on NFS, we can see EOPNOTSUPP. | ||
// See issue #40731. | ||
// | ||
// If the process is running inside a Docker container, | ||
// we might see EPERM instead of ENOSYS. See issue | ||
// #40893. Since EPERM might also be a legitimate error, | ||
// don't mark copy_file_range(2) as unsupported. | ||
return false, nil | ||
case nil: | ||
if copied == 0 { | ||
// If we did not read any bytes at all, | ||
// then this file may be in a file system | ||
// where copy_file_range silently fails. | ||
// https://lore.kernel.org/linux-fsdevel/[email protected]/T/#m05753578c7f7882f6e9ffe01f981bc223edef2b0 | ||
if written == 0 { | ||
return false, nil | ||
} | ||
remain -= n | ||
written += n | ||
default: | ||
return written, true, err | ||
} | ||
} | ||
return written, true, nil | ||
} | ||
|
||
// copyFileRange performs one round of copy_file_range(2). | ||
func copyFileRange(dst, src *FD, max int) (written int64, err error) { | ||
// The signature of copy_file_range(2) is: | ||
// | ||
// ssize_t copy_file_range(int fd_in, loff_t *off_in, | ||
// int fd_out, loff_t *off_out, | ||
// size_t len, unsigned int flags); | ||
// | ||
// Note that in the call to unix.CopyFileRange below, we use nil | ||
// values for off_in and off_out. For the system call, this means | ||
// "use and update the file offsets". That is why we must acquire | ||
// locks for both file descriptors (and why this whole machinery is | ||
// in the internal/poll package to begin with). | ||
if err := dst.writeLock(); err != nil { | ||
return 0, err | ||
} | ||
defer dst.writeUnlock() | ||
if err := src.readLock(); err != nil { | ||
return 0, err | ||
} | ||
defer src.readUnlock() | ||
var n int | ||
for { | ||
n, err = unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0) | ||
if err != syscall.EINTR { | ||
break | ||
// Otherwise src is at EOF, which means | ||
// we are done. | ||
} | ||
} | ||
return int64(n), err | ||
return true, err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
//go:build freebsd || linux | ||
|
||
package poll | ||
|
||
import ( | ||
"internal/syscall/unix" | ||
"syscall" | ||
) | ||
|
||
// CopyFileRange copies at most remain bytes of data from src to dst, using | ||
// the copy_file_range system call. dst and src must refer to regular files. | ||
func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) { | ||
if !supportCopyFileRange() { | ||
return 0, false, nil | ||
} | ||
|
||
for remain > 0 { | ||
max := remain | ||
if max > maxCopyFileRangeRound { | ||
max = maxCopyFileRangeRound | ||
} | ||
n, e := copyFileRange(dst, src, int(max)) | ||
if e == nil { | ||
remain -= n | ||
written += n | ||
} | ||
handled, err = handleCopyFileRangeErr(e, n, written) | ||
if n == 0 || !handled || err != nil { | ||
return | ||
} | ||
} | ||
|
||
return written, true, nil | ||
} | ||
|
||
// copyFileRange performs one round of copy_file_range(2). | ||
func copyFileRange(dst, src *FD, max int) (written int64, err error) { | ||
// For Linux, the signature of copy_file_range(2) is: | ||
// | ||
// ssize_t copy_file_range(int fd_in, loff_t *off_in, | ||
// int fd_out, loff_t *off_out, | ||
// size_t len, unsigned int flags); | ||
// | ||
// For FreeBSD, the signature of copy_file_range(2) is: | ||
// | ||
// ssize_t | ||
// copy_file_range(int infd, off_t *inoffp, int outfd, off_t *outoffp, | ||
// size_t len, unsigned int flags); | ||
// | ||
// Note that in the call to unix.CopyFileRange below, we use nil | ||
// values for off_in/off_out and inoffp/outoffp, which means "the file | ||
// offset for infd(fd_in) or outfd(fd_out) respectively will be used and | ||
// updated by the number of bytes copied". | ||
// | ||
// That is why we must acquire locks for both file descriptors (and why | ||
// this whole machinery is in the internal/poll package to begin with). | ||
if err := dst.writeLock(); err != nil { | ||
return 0, err | ||
} | ||
defer dst.writeUnlock() | ||
if err := src.readLock(); err != nil { | ||
return 0, err | ||
} | ||
defer src.readUnlock() | ||
var n int | ||
for { | ||
n, err = unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0) | ||
if err != syscall.EINTR { | ||
break | ||
} | ||
} | ||
return int64(n), err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package unix | ||
|
||
import ( | ||
"sync" | ||
"syscall" | ||
) | ||
|
||
// KernelVersion returns major and minor kernel version numbers | ||
// parsed from the syscall.Sysctl("kern.osrelease")'s value, | ||
// or (0, 0) if the version can't be obtained or parsed. | ||
func KernelVersion() (major, minor int) { | ||
release, err := syscall.Sysctl("kern.osrelease") | ||
if err != nil { | ||
return 0, 0 | ||
} | ||
|
||
parseNext := func() (n int) { | ||
for i, c := range release { | ||
if c == '.' { | ||
release = release[i+1:] | ||
return | ||
} | ||
if '0' <= c && c <= '9' { | ||
n = n*10 + int(c-'0') | ||
} | ||
} | ||
release = "" | ||
return | ||
} | ||
|
||
major = parseNext() | ||
minor = parseNext() | ||
|
||
return | ||
} | ||
|
||
// SupportCopyFileRange reports whether the kernel supports the copy_file_range(2). | ||
// This function will examine both the kernel version and the availability of the system call. | ||
var SupportCopyFileRange = sync.OnceValue(func() bool { | ||
// The copy_file_range() function first appeared in FreeBSD 13.0. | ||
major, _ := KernelVersion() | ||
_, err := CopyFileRange(0, nil, 0, nil, 0, 0) | ||
return major >= 13 && err != syscall.ENOSYS | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// Copyright 2024 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package unix_test | ||
|
||
import ( | ||
"internal/syscall/unix" | ||
"syscall" | ||
"testing" | ||
) | ||
|
||
func TestSupportCopyFileRange(t *testing.T) { | ||
major, minor := unix.KernelVersion() | ||
t.Logf("Running on FreeBSD %d.%d\n", major, minor) | ||
|
||
_, err := unix.CopyFileRange(0, nil, 0, nil, 0, 0) | ||
want := err != syscall.ENOSYS | ||
got := unix.SupportCopyFileRange() | ||
if want != got { | ||
t.Fatalf("SupportCopyFileRange, got %t; want %t", got, want) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.