Skip to content

Commit

Permalink
Faster dependency outputs (#1533)
Browse files Browse the repository at this point in the history
* Refactor out TerraformSource struct and functions so we can use it in config package without circular dependency

* [skip ci] wip

* Add regression testing for dependency optimization

* Update cli/tfsource/types.go

Co-authored-by: Yevgeniy Brikman <[email protected]>

* Add comment indicating what would happen if the ref changes

Co-authored-by: Yevgeniy Brikman <[email protected]>
  • Loading branch information
yorinasub17 and brikis98 authored Feb 10, 2021
1 parent 21df930 commit 4e47697
Show file tree
Hide file tree
Showing 10 changed files with 445 additions and 313 deletions.
15 changes: 8 additions & 7 deletions cli/cli_app.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ import (
"encoding/json"
"fmt"
"io"

"regexp"
"strings"
"time"

"github.com/mattn/go-zglob"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"

"github.com/gruntwork-io/terragrunt/aws_helper"
"github.com/gruntwork-io/terragrunt/cli/tfsource"
"github.com/gruntwork-io/terragrunt/codegen"
"github.com/gruntwork-io/terragrunt/config"
"github.com/gruntwork-io/terragrunt/configstack"
Expand All @@ -18,9 +22,6 @@ import (
"github.com/gruntwork-io/terragrunt/remote"
"github.com/gruntwork-io/terragrunt/shell"
"github.com/gruntwork-io/terragrunt/util"
"github.com/mattn/go-zglob"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
)

const OPT_TERRAGRUNT_CONFIG = "terragrunt-config"
Expand Down Expand Up @@ -386,7 +387,7 @@ func RunTerragrunt(terragruntOptions *options.TerragruntOptions) error {
}

updatedTerragruntOptions := terragruntOptions
if sourceUrl := getTerraformSourceUrl(terragruntOptions, terragruntConfig); sourceUrl != "" {
if sourceUrl := config.GetTerraformSourceUrl(terragruntOptions, terragruntConfig); sourceUrl != "" {
updatedTerragruntOptions, err = downloadTerraformSource(sourceUrl, terragruntOptions, terragruntConfig)
if err != nil {
return err
Expand Down Expand Up @@ -851,7 +852,7 @@ func providersNeedInit(terragruntOptions *options.TerragruntOptions) bool {
//
// This method takes in the "original" terragrunt options which has the unmodified 'WorkingDir' from before downloading the code from the source URL,
// and the "updated" terragrunt options that will contain the updated 'WorkingDir' into which the code has been downloaded
func runTerraformInit(originalTerragruntOptions *options.TerragruntOptions, terragruntOptions *options.TerragruntOptions, terragruntConfig *config.TerragruntConfig, terraformSource *TerraformSource) error {
func runTerraformInit(originalTerragruntOptions *options.TerragruntOptions, terragruntOptions *options.TerragruntOptions, terragruntConfig *config.TerragruntConfig, terraformSource *tfsource.TerraformSource) error {

// Prevent Auto-Init if the user has disabled it
if util.FirstArg(terragruntOptions.TerraformCliArgs) != CMD_INIT && !terragruntOptions.AutoInit {
Expand All @@ -867,7 +868,7 @@ func runTerraformInit(originalTerragruntOptions *options.TerragruntOptions, terr
return runTerragruntWithConfig(originalTerragruntOptions, initOptions, terragruntConfig, terraformSource != nil)
}

func prepareInitOptions(terragruntOptions *options.TerragruntOptions, terraformSource *TerraformSource) (*options.TerragruntOptions, error) {
func prepareInitOptions(terragruntOptions *options.TerragruntOptions, terraformSource *tfsource.TerraformSource) (*options.TerragruntOptions, error) {
// Need to clone the terragruntOptions, so the TerraformCliArgs can be configured to run the init command
initOptions := terragruntOptions.Clone(terragruntOptions.TerragruntConfigPath)
initOptions.TerraformCliArgs = []string{CMD_INIT}
Expand Down
240 changes: 12 additions & 228 deletions cli/download_source.go

Large diffs are not rendered by default.

58 changes: 6 additions & 52 deletions cli/download_source_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/gruntwork-io/terragrunt/cli/tfsource"
"github.com/gruntwork-io/terragrunt/config"
"github.com/gruntwork-io/terragrunt/options"
"github.com/gruntwork-io/terragrunt/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestAlreadyHaveLatestCodeLocalFilePath(t *testing.T) {
Expand Down Expand Up @@ -223,56 +225,8 @@ func TestDownloadTerraformSourceFromLocalFolderWithManifest(t *testing.T) {

}

func TestSplitSourceUrl(t *testing.T) {
t.Parallel()

testCases := []struct {
name string
sourceUrl string
expectedRootRepo string
expectedModulePath string
}{
{"root-path-only-no-double-slash", "/foo", "/foo", ""},
{"parent-path-one-child-no-double-slash", "/foo/bar", "/foo/bar", ""},
{"parent-path-multiple-children-no-double-slash", "/foo/bar/baz/blah", "/foo/bar/baz/blah", ""},
{"relative-path-no-children-no-double-slash", "../foo", "../foo", ""},
{"relative-path-one-child-no-double-slash", "../foo/bar", "../foo/bar", ""},
{"relative-path-multiple-children-no-double-slash", "../foo/bar/baz/blah", "../foo/bar/baz/blah", ""},
{"root-path-only-with-double-slash", "/foo//", "/foo", ""},
{"parent-path-one-child-with-double-slash", "/foo//bar", "/foo", "bar"},
{"parent-path-multiple-children-with-double-slash", "/foo/bar//baz/blah", "/foo/bar", "baz/blah"},
{"relative-path-no-children-with-double-slash", "..//foo", "..", "foo"},
{"relative-path-one-child-with-double-slash", "../foo//bar", "../foo", "bar"},
{"relative-path-multiple-children-with-double-slash", "../foo/bar//baz/blah", "../foo/bar", "baz/blah"},
{"parent-url-one-child-no-double-slash", "ssh://[email protected]/foo/modules.git/foo", "ssh://[email protected]/foo/modules.git/foo", ""},
{"parent-url-multiple-children-no-double-slash", "ssh://[email protected]/foo/modules.git/foo/bar/baz/blah", "ssh://[email protected]/foo/modules.git/foo/bar/baz/blah", ""},
{"parent-url-one-child-with-double-slash", "ssh://[email protected]/foo/modules.git//foo", "ssh://[email protected]/foo/modules.git", "foo"},
{"parent-url-multiple-children-with-double-slash", "ssh://[email protected]/foo/modules.git//foo/bar/baz/blah", "ssh://[email protected]/foo/modules.git", "foo/bar/baz/blah"},
}

for _, testCase := range testCases {
// Save a local copy in scope so all the tests don't run the final item in the loop
testCase := testCase
t.Run(testCase.name, func(t *testing.T) {
t.Parallel()

sourceUrl, err := url.Parse(testCase.sourceUrl)
require.NoError(t, err)

terragruntOptions, err := options.NewTerragruntOptionsForTest("testing")
require.NoError(t, err)

actualRootRepo, actualModulePath, err := splitSourceUrl(sourceUrl, terragruntOptions.Logger)
require.NoError(t, err)

assert.Equal(t, testCase.expectedRootRepo, actualRootRepo.String())
assert.Equal(t, testCase.expectedModulePath, actualModulePath)
})
}
}

func testDownloadTerraformSourceIfNecessary(t *testing.T, canonicalUrl string, downloadDir string, sourceUpdate bool, expectedFileContents string) {
terraformSource := &TerraformSource{
terraformSource := &tfsource.TerraformSource{
CanonicalSourceURL: parseUrl(t, canonicalUrl),
DownloadDir: downloadDir,
WorkingDir: downloadDir,
Expand Down Expand Up @@ -306,7 +260,7 @@ func testDownloadTerraformSourceIfNecessary(t *testing.T, canonicalUrl string, d
}

func testAlreadyHaveLatestCode(t *testing.T, canonicalUrl string, downloadDir string, expected bool) {
terraformSource := &TerraformSource{
terraformSource := &tfsource.TerraformSource{
CanonicalSourceURL: parseUrl(t, canonicalUrl),
DownloadDir: downloadDir,
WorkingDir: downloadDir,
Expand Down
214 changes: 214 additions & 0 deletions cli/tfsource/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
package tfsource

import (
"fmt"
"io/ioutil"
"net/url"
"regexp"
"strings"

"github.com/hashicorp/go-getter"
urlhelper "github.com/hashicorp/go-getter/helper/url"
"github.com/sirupsen/logrus"

"github.com/gruntwork-io/terragrunt/errors"
"github.com/gruntwork-io/terragrunt/util"
)

var forcedRegexp = regexp.MustCompile(`^([A-Za-z0-9]+)::(.+)$`)

// This struct represents information about Terraform source code that needs to be downloaded
type TerraformSource struct {
// A canonical version of RawSource, in URL format
CanonicalSourceURL *url.URL

// The folder where we should download the source to
DownloadDir string

// The folder in DownloadDir that should be used as the working directory for Terraform
WorkingDir string

// The path to a file in DownloadDir that stores the version number of the code
VersionFile string
}

func (src *TerraformSource) String() string {
return fmt.Sprintf("TerraformSource{CanonicalSourceURL = %v, DownloadDir = %v, WorkingDir = %v, VersionFile = %v}", src.CanonicalSourceURL, src.DownloadDir, src.WorkingDir, src.VersionFile)
}

// Encode a version number for the given source. When calculating a version number, we take the query
// string of the source URL, calculate its sha1, and base 64 encode it. For remote URLs (e.g. Git URLs), this is
// based on the assumption that the scheme/host/path of the URL (e.g. git::github.com/foo/bar) identifies the module
// name and the query string (e.g. ?ref=v0.0.3) identifies the version. For local file paths, there is no query string,
// so the same file path (/foo/bar) is always considered the same version. See also the encodeSourceName and
// ProcessTerraformSource methods.
func (terraformSource TerraformSource) EncodeSourceVersion() string {
return util.EncodeBase64Sha1(terraformSource.CanonicalSourceURL.Query().Encode())
}

// Write a file into the DownloadDir that contains the version number of this source code. The version number is
// calculated using the EncodeSourceVersion method.
func (terraformSource TerraformSource) WriteVersionFile() error {
version := terraformSource.EncodeSourceVersion()
return errors.WithStackTrace(ioutil.WriteFile(terraformSource.VersionFile, []byte(version), 0640))
}

// Take the given source path and create a TerraformSource struct from it, including the folder where the source should
// be downloaded to. Our goal is to reuse the download folder for the same source URL between Terragrunt runs.
// Otherwise, for every Terragrunt command, you'd have to wait for Terragrunt to download your Terraform code, download
// that code's dependencies (terraform get), and configure remote state (terraform remote config), which is very slow.
//
// To maximize reuse, given a working directory w and a source URL s, we download code from S into the folder /T/W/H
// where:
//
// 1. S is the part of s before the double-slash (//). This typically represents the root of the repo (e.g.
// github.com/foo/infrastructure-modules). We download the entire repo so that relative paths to other files in that
// repo resolve correctly. If no double-slash is specified, all of s is used.
// 1. T is the OS temp dir (e.g. /tmp).
// 2. W is the base 64 encoded sha1 hash of w. This ensures that if you are running Terragrunt concurrently in
// multiple folders (e.g. during automated tests), then even if those folders are using the same source URL s, they
// do not overwrite each other.
// 3. H is the base 64 encoded sha1 of S without its query string. For remote source URLs (e.g. Git
// URLs), this is based on the assumption that the scheme/host/path of the URL (e.g. git::github.com/foo/bar)
// identifies the repo, and we always want to download the same repo into the same folder (see the encodeSourceName
// method). We also assume the version of the module is stored in the query string (e.g. ref=v0.0.3), so we store
// the base 64 encoded sha1 of the query string in a file called .terragrunt-source-version within /T/W/H.
//
// The downloadTerraformSourceIfNecessary decides when we should download the Terraform code and when not to. It uses
// the following rules:
//
// 1. Always download source URLs pointing to local file paths.
// 2. Only download source URLs pointing to remote paths if /T/W/H doesn't already exist or, if it does exist, if the
// version number in /T/W/H/.terragrunt-source-version doesn't match the current version.
func NewTerraformSource(source string, downloadDir string, workingDir string, logger *logrus.Entry) (*TerraformSource, error) {

canonicalWorkingDir, err := util.CanonicalPath(workingDir, "")
if err != nil {
return nil, err
}

canonicalSourceUrl, err := toSourceUrl(source, canonicalWorkingDir)
if err != nil {
return nil, err
}

rootSourceUrl, modulePath, err := splitSourceUrl(canonicalSourceUrl, logger)
if err != nil {
return nil, err
}

if IsLocalSource(rootSourceUrl) {
// Always use canonical file paths for local source folders, rather than relative paths, to ensure
// that the same local folder always maps to the same download folder, no matter how the local folder
// path is specified
canonicalFilePath, err := util.CanonicalPath(rootSourceUrl.Path, "")
if err != nil {
return nil, err
}
rootSourceUrl.Path = canonicalFilePath
}

rootPath, err := encodeSourceName(rootSourceUrl)
if err != nil {
return nil, err
}

encodedWorkingDir := util.EncodeBase64Sha1(canonicalWorkingDir)
updatedDownloadDir := util.JoinPath(downloadDir, encodedWorkingDir, rootPath)
updatedWorkingDir := util.JoinPath(updatedDownloadDir, modulePath)
versionFile := util.JoinPath(updatedDownloadDir, ".terragrunt-source-version")

return &TerraformSource{
CanonicalSourceURL: rootSourceUrl,
DownloadDir: updatedDownloadDir,
WorkingDir: updatedWorkingDir,
VersionFile: versionFile,
}, nil
}

// Convert the given source into a URL struct. This method should be able to handle all source URLs that the terraform
// init command can handle, parsing local file paths, Git paths, and HTTP URLs correctly.
func toSourceUrl(source string, workingDir string) (*url.URL, error) {
// The go-getter library is what Terraform's init command uses to download source URLs. Use that library to
// parse the URL.
rawSourceUrlWithGetter, err := getter.Detect(source, workingDir, getter.Detectors)
if err != nil {
return nil, errors.WithStackTrace(err)
}

return parseSourceUrl(rawSourceUrlWithGetter)
}

// Parse the given source URL into a URL struct. This method can handle source URLs that include go-getter's "forced
// getter" prefixes, such as git::.
func parseSourceUrl(source string) (*url.URL, error) {
forcedGetter, rawSourceUrl := getForcedGetter(source)

// Parse the URL without the getter prefix
canonicalSourceUrl, err := urlhelper.Parse(rawSourceUrl)
if err != nil {
return nil, errors.WithStackTrace(err)
}

// Reattach the "getter" prefix as part of the scheme
if forcedGetter != "" {
canonicalSourceUrl.Scheme = fmt.Sprintf("%s::%s", forcedGetter, canonicalSourceUrl.Scheme)
}

return canonicalSourceUrl, nil
}

// Returns true if the given URL refers to a path on the local file system
func IsLocalSource(sourceUrl *url.URL) bool {
return sourceUrl.Scheme == "file"
}

// Splits a source URL into the root repo and the path. The root repo is the part of the URL before the double-slash
// (//), which typically represents the root of a modules repo (e.g. github.com/foo/infrastructure-modules) and the
// path is everything after the double slash. If there is no double-slash in the URL, the root repo is the entire
// sourceUrl and the path is an empty string.
func splitSourceUrl(sourceUrl *url.URL, logger *logrus.Entry) (*url.URL, string, error) {
pathSplitOnDoubleSlash := strings.SplitN(sourceUrl.Path, "//", 2)

if len(pathSplitOnDoubleSlash) > 1 {
sourceUrlModifiedPath, err := parseSourceUrl(sourceUrl.String())
if err != nil {
return nil, "", errors.WithStackTrace(err)
}

sourceUrlModifiedPath.Path = pathSplitOnDoubleSlash[0]
return sourceUrlModifiedPath, pathSplitOnDoubleSlash[1], nil
} else {
logger.Warningf("No double-slash (//) found in source URL %s. Relative paths in downloaded Terraform code may not work.", sourceUrl.Path)
return sourceUrl, "", nil
}
}

// Encode a the module name for the given source URL. When calculating a module name, we calculate the base 64 encoded
// sha1 of the entire source URL without the query string. For remote URLs (e.g. Git URLs), this is based on the
// assumption that the scheme/host/path of the URL (e.g. git::github.com/foo/bar) identifies the module name and the
// query string (e.g. ?ref=v0.0.3) identifies the version. For local file paths, there is no query string, so the same
// file path (/foo/bar) is always considered the same version. See also the EncodeSourceVersion and
// ProcessTerraformSource methods.
func encodeSourceName(sourceUrl *url.URL) (string, error) {
sourceUrlNoQuery, err := parseSourceUrl(sourceUrl.String())
if err != nil {
return "", errors.WithStackTrace(err)
}

sourceUrlNoQuery.RawQuery = ""

return util.EncodeBase64Sha1(sourceUrlNoQuery.String()), nil
}

// Terraform source URLs can contain a "getter" prefix that specifies the type of protocol to use to download that URL,
// such as "git::", which means Git should be used to download the URL. This method returns the getter prefix and the
// rest of the URL. This code is copied from the getForcedGetter method of go-getter/get.go, as that method is not
// exported publicly.
func getForcedGetter(sourceUrl string) (string, string) {
if matches := forcedRegexp.FindStringSubmatch(sourceUrl); matches != nil && len(matches) > 2 {
return matches[1], matches[2]
}

return "", sourceUrl
}
Loading

0 comments on commit 4e47697

Please sign in to comment.