From 64759ee49b141858df0e90bdf3a55d053134ebda Mon Sep 17 00:00:00 2001 From: Alessandro Patti Date: Wed, 7 Aug 2024 13:34:42 +0200 Subject: [PATCH] Lazy expand absolute path to avoid including them in the output (#4009) **What type of PR is this?** Bug fix **What does this PR do? Why is it needed?** It fixes reproducibility issues reported in https://github.com/bazelbuild/rules_go/issues/3994 by lazy-expanding paths to absolute paths **Which issues(s) does this PR fix?** Fixes #3994 --- go/tools/builders/BUILD.bazel | 1 + go/tools/builders/builder.go | 2 + go/tools/builders/cc.go | 47 +++++++ go/tools/builders/env.go | 39 +++++- go/tools/builders/stdlib.go | 4 +- go/tools/builders/stdliblist.go | 4 +- .../reproducibility/reproducibility_test.go | 131 ++++++++++++------ 7 files changed, 179 insertions(+), 49 deletions(-) create mode 100644 go/tools/builders/cc.go diff --git a/go/tools/builders/BUILD.bazel b/go/tools/builders/BUILD.bazel index 244513000a..631dac1590 100644 --- a/go/tools/builders/BUILD.bazel +++ b/go/tools/builders/BUILD.bazel @@ -63,6 +63,7 @@ filegroup( "ar.go", "asm.go", "builder.go", + "cc.go", "cgo2.go", "compilepkg.go", "constants.go", diff --git a/go/tools/builders/builder.go b/go/tools/builders/builder.go index f9b96b8b8e..fd0a5ba6af 100644 --- a/go/tools/builders/builder.go +++ b/go/tools/builders/builder.go @@ -54,6 +54,8 @@ func main() { action = stdlib case "stdliblist": action = stdliblist + case "cc": + action = cc default: log.Fatalf("unknown action: %s", verb) } diff --git a/go/tools/builders/cc.go b/go/tools/builders/cc.go new file mode 100644 index 0000000000..90f9258b9c --- /dev/null +++ b/go/tools/builders/cc.go @@ -0,0 +1,47 @@ +package main + +import ( + "errors" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "syscall" +) + +func cc(args []string) error { + cc := os.Getenv("GO_CC") + if cc == "" { + errors.New("GO_CC environment variable not set") + } + ccroot := os.Getenv("GO_CC_ROOT") + if ccroot == "" { + errors.New("GO_CC_ROOT environment variable not set") + } + normalized := []string{cc} + normalized = append(normalized, args...) + transformArgs(normalized, cgoAbsEnvFlags, func(s string) string { + if strings.HasPrefix(s, cgoAbsPlaceholder) { + trimmed := strings.TrimPrefix(s, cgoAbsPlaceholder) + abspath := filepath.Join(ccroot, trimmed) + if _, err := os.Stat(abspath); err == nil { + // Only return the abspath if it exists, otherwise it + // means that either it won't have any effect or the original + // value was not a relpath (e.g. a path with a XCODE placehold from + // macos cc_wrapper) + return abspath + } + return trimmed + } + return s + }) + if runtime.GOOS == "windows" { + cmd := exec.Command(normalized[0], normalized[1:]...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() + } else { + return syscall.Exec(normalized[0], normalized, os.Environ()) + } +} diff --git a/go/tools/builders/env.go b/go/tools/builders/env.go index cc8b37d721..bb01ac205f 100644 --- a/go/tools/builders/env.go +++ b/go/tools/builders/env.go @@ -35,6 +35,8 @@ var ( cgoEnvVars = []string{"CGO_CFLAGS", "CGO_CXXFLAGS", "CGO_CPPFLAGS", "CGO_LDFLAGS"} // cgoAbsEnvFlags are all the flags that need absolute path in cgoEnvVars cgoAbsEnvFlags = []string{"-I", "-L", "-isysroot", "-isystem", "-iquote", "-include", "-gcc-toolchain", "--sysroot", "-resource-dir", "-fsanitize-blacklist", "-fsanitize-ignorelist"} + // cgoAbsPlaceholder is placed in front of flag values that must be absolutized + cgoAbsPlaceholder = "__GO_BAZEL_CC_PLACEHOLDER__" ) // env holds a small amount of Go environment and toolchain information @@ -160,10 +162,33 @@ func (e *env) runCommandToFile(out, err io.Writer, args []string) error { return runAndLogCommand(cmd, e.verbose) } -func absEnv(envNameList []string, argList []string) error { +// absCCCompiler modifies CGO flags to workaround relative paths. +// Because go is having its own sandbox, all CGO flags should use +// absolute paths. However, CGO flags are embedded in the output +// so we cannot use absolute paths directly. Instead, use a placeholder +// for the absolute path and we replace CC with this builder so that +// we can expand the placeholder later. +func absCCCompiler(envNameList []string, argList []string) error { + err := os.Setenv("GO_CC", os.Getenv("CC")) + if err != nil { + return err + } + err = os.Setenv("GO_CC_ROOT", abs(".")) + if err != nil { + return err + } + err = os.Setenv("CC", abs(os.Args[0])+" cc") + if err != nil { + return err + } for _, envName := range envNameList { splitedEnv := strings.Fields(os.Getenv(envName)) - absArgs(splitedEnv, argList) + transformArgs(splitedEnv, argList, func(s string) string { + if filepath.IsAbs(s) { + return s + } + return cgoAbsPlaceholder + s + }) if err := os.Setenv(envName, strings.Join(splitedEnv, " ")); err != nil { return err } @@ -320,10 +345,16 @@ func abs(path string) string { // absArgs applies abs to strings that appear in args. Only paths that are // part of options named by flags are modified. func absArgs(args []string, flags []string) { + transformArgs(args, flags, abs) +} + +// transformArgs applies fn to strings that appear in args. Only paths that are +// part of options named by flags are modified. +func transformArgs(args []string, flags []string, fn func(string) string) { absNext := false for i := range args { if absNext { - args[i] = abs(args[i]) + args[i] = fn(args[i]) absNext = false continue } @@ -341,7 +372,7 @@ func absArgs(args []string, flags []string) { possibleValue = possibleValue[1:] separator = "=" } - args[i] = fmt.Sprintf("%s%s%s", f, separator, abs(possibleValue)) + args[i] = fmt.Sprintf("%s%s%s", f, separator, fn(possibleValue)) break } } diff --git a/go/tools/builders/stdlib.go b/go/tools/builders/stdlib.go index 15d52bb012..105ca5c635 100644 --- a/go/tools/builders/stdlib.go +++ b/go/tools/builders/stdlib.go @@ -158,9 +158,7 @@ You may need to use the flags --cpu=x64_windows --compiler=mingw-gcc.`) installArgs = append(installArgs, "-ldflags="+allSlug+strings.Join(ldflags, " ")) installArgs = append(installArgs, "-asmflags="+allSlug+strings.Join(asmflags, " ")) - // Modify CGO flags to use only absolute path - // because go is having its own sandbox, all CGO flags must use absolute path - if err := absEnv(cgoEnvVars, cgoAbsEnvFlags); err != nil { + if err := absCCCompiler(cgoEnvVars, cgoAbsEnvFlags); err != nil { return fmt.Errorf("error modifying cgo environment to absolute path: %v", err) } diff --git a/go/tools/builders/stdliblist.go b/go/tools/builders/stdliblist.go index 098be0408c..2648d2486d 100644 --- a/go/tools/builders/stdliblist.go +++ b/go/tools/builders/stdliblist.go @@ -257,9 +257,7 @@ func stdliblist(args []string) error { } os.Setenv("CC", quotePathIfNeeded(abs(ccEnv))) - // Modify CGO flags to use only absolute path - // because go is having its own sandbox, all CGO flags must use absolute path - if err := absEnv(cgoEnvVars, cgoAbsEnvFlags); err != nil { + if err := absCCCompiler(cgoEnvVars, cgoAbsEnvFlags); err != nil { return fmt.Errorf("error modifying cgo environment to absolute path: %v", err) } diff --git a/tests/integration/reproducibility/reproducibility_test.go b/tests/integration/reproducibility/reproducibility_test.go index 2e9bc59144..0feeee4714 100644 --- a/tests/integration/reproducibility/reproducibility_test.go +++ b/tests/integration/reproducibility/reproducibility_test.go @@ -189,6 +189,8 @@ func Test(t *testing.T) { defer wg.Done() cmd := bazel_testing.BazelCmd("build", "//:all", + "--copt=-Irelative/path/does/not/exist", + "--linkopt=-Lrelative/path/does/not/exist", "@io_bazel_rules_go//go/tools/builders:go_path", "@go_sdk//:builder", ) @@ -200,49 +202,92 @@ func Test(t *testing.T) { } wg.Wait() - // Hash files in each bazel-bin directory. - dirHashes := make([][]fileHash, len(dirs)) - errs := make([]error, len(dirs)) - wg.Add(len(dirs)) - for i := range dirs { - go func(i int) { - defer wg.Done() - dirHashes[i], errs[i] = hashFiles(filepath.Join(dirs[i], "bazel-bin")) - }(i) - } - wg.Wait() - for _, err := range errs { - if err != nil { + t.Run("Check Hashes", func(t *testing.T) { + // Hash files in each bazel-bin directory. + dirHashes := make([][]fileHash, len(dirs)) + errs := make([]error, len(dirs)) + wg.Add(len(dirs)) + for i := range dirs { + go func(i int) { + defer wg.Done() + dirHashes[i], errs[i] = hashFiles(filepath.Join(dirs[i], "bazel-out/"), func(root, path string) bool { + // Hash everything but actions stdout/stderr and the volatile-status.txt file + // as they are expected to change + return strings.HasPrefix(path, filepath.Join(root, "_tmp")) || + path == filepath.Join(root, "volatile-status.txt") + }) + }(i) + } + wg.Wait() + for _, err := range errs { + if err != nil { + t.Fatal(err) + } + } + + // Compare dir0 and dir1. They should be identical. + if err := compareHashes(dirHashes[0], dirHashes[1]); err != nil { t.Fatal(err) } - } - // Compare dir0 and dir1. They should be identical. - if err := compareHashes(dirHashes[0], dirHashes[1]); err != nil { - t.Fatal(err) - } + // Compare dir0 and dir2. They should be different. + if err := compareHashes(dirHashes[0], dirHashes[2]); err == nil { + t.Fatalf("dir0 and dir2 are the same)", len(dirHashes[0])) + } + }) - // Compare dir0 and dir2. They should be different. - if err := compareHashes(dirHashes[0], dirHashes[2]); err == nil { - t.Fatalf("dir0 and dir2 are the same)", len(dirHashes[0])) - } + t.Run("Check builder", func(t *testing.T) { + // Check that the go_sdk path doesn't appear in the builder binary. This path is different + // nominally different per workspace (but in these tests, the go_sdk paths are all set to the same + // path in WORKSPACE) -- so if this path is in the builder binary, then builds between workspaces + // would be partially non cacheable. + builder_file, err := os.Open(filepath.Join(dirs[0], "bazel-bin", "external", "go_sdk", "builder")) + if err != nil { + t.Fatal(err) + } + defer builder_file.Close() + builder_data, err := ioutil.ReadAll(builder_file) + if err != nil { + t.Fatal(err) + } + if bytes.Index(builder_data, []byte("go_sdk")) != -1 { + t.Fatalf("Found go_sdk path in builder binary, builder tool won't be reproducible") + } + }) - // Check that the go_sdk path doesn't appear in the builder binary. This path is different - // nominally different per workspace (but in these tests, the go_sdk paths are all set to the same - // path in WORKSPACE) -- so if this path is in the builder binary, then builds between workspaces - // would be partially non cacheable. - builder_file, err := os.Open(filepath.Join(dirs[0], "bazel-bin", "external", "go_sdk", "builder")) - if err != nil { - t.Fatal(err) - } - defer builder_file.Close() - builder_data, err := ioutil.ReadAll(builder_file) - if err != nil { - t.Fatal(err) - } - if bytes.Index(builder_data, []byte("go_sdk")) != -1 { - t.Fatalf("Found go_sdk path in builder binary, builder tool won't be reproducible") - } + t.Run("Check stdlib", func(t *testing.T) { + for _, dir := range dirs { + found := false + root, err := os.Readlink(filepath.Join(dir, "bazel-out/")) + if err != nil { + t.Fatal(err) + } + + filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !strings.Contains(path, "stdlib_/pkg") { + return nil + } + if !strings.HasSuffix(path, ".a") { + return nil + } + data, err := ioutil.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if bytes.Index(data, []byte("__GO_BAZEL_CC_PLACEHOLDER__")) == -1 { + found = true + return filepath.SkipAll + } + return nil + }) + if !found { + t.Fatal("Placeholder not found in stdlib of " + dir) + } + } + }) } func copyTree(dstRoot, srcRoot string) error { @@ -311,7 +356,7 @@ type fileHash struct { rel, hash string } -func hashFiles(dir string) ([]fileHash, error) { +func hashFiles(dir string, exclude func(root, path string) bool) ([]fileHash, error) { // Follow top-level symbolic link root := dir for { @@ -348,7 +393,15 @@ func hashFiles(dir string) ([]fileHash, error) { return err } } + if info.IsDir() { + if exclude(root, path) { + return filepath.SkipDir + } + return nil + } + + if exclude(root, path) { return nil }