Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make dependencies more compact #342

Merged
merged 4 commits into from
Mar 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- Refactor: factor out jinja interactions ([#309](https://github.com/fishtown-analytics/dbt/pull/309))
- Speedup: detect cycles at the end of compilation ([#307](https://github.com/fishtown-analytics/dbt/pull/307))
- Speedup: write graph file with gpickle instead of yaml ([#306](https://github.com/fishtown-analytics/dbt/pull/306))
- Clone dependencies with `--depth 1` to make them more compact ([#277](https://github.com/fishtown-analytics/dbt/issues/277), [#342](https://github.com/fishtown-analytics/dbt/pull/342))

## dbt 0.7.1 (February 28, 2017)

Expand Down
41 changes: 41 additions & 0 deletions dbt/clients/git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import subprocess

from dbt.logger import GLOBAL_LOGGER as logger


def log_cmd(cmd):
logger.debug('Executing "{}"'.format(' '.join(cmd)))


def run_cmd(cwd, cmd):
log_cmd(cmd)
proc = subprocess.Popen(
cmd,
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

return proc.communicate()


def clone(repo, cwd):
return run_cmd(cwd, ['git', 'clone', '--depth', '1', repo])


def checkout(cwd, branch=None):
if branch is None:
branch = 'master'

remote_branch = 'origin/{}'.format(branch)

logger.info(' Checking out branch {}.'.format(branch))

run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch])
run_cmd(cwd, ['git', 'fetch', '--depth', '1', 'origin', branch])
run_cmd(cwd, ['git', 'reset', '--hard', remote_branch])


def get_current_sha(cwd):
out, err = run_cmd(cwd, ['git', 'rev-parse', 'HEAD'])

return out.decode('utf-8')
69 changes: 25 additions & 44 deletions dbt/task/deps.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import os
import errno
import re
import yaml
import pprint
import subprocess

import dbt.clients.git
import dbt.project as project

from dbt.logger import GLOBAL_LOGGER as logger
Expand All @@ -20,57 +19,39 @@ def __init__(self, args, project):
self.args = args
self.project = project

def __checkout_branch(self, branch, full_path):
logger.info(" checking out branch {}".format(branch))
proc = subprocess.Popen(
['git', 'checkout', branch],
cwd=full_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()

def __pull_repo(self, repo, branch=None):
proc = subprocess.Popen(
['git', 'clone', repo],
cwd=self.project['modules-path'],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
modules_path = self.project['modules-path']

out, err = proc.communicate()
out, err = dbt.clients.git.clone(repo, modules_path)

exists = re.match(
"fatal: destination path '(.+)' already exists",
err.decode('utf-8')
)
exists = re.match("fatal: destination path '(.+)' already exists",
err.decode('utf-8'))

folder = None
start_sha = None

if exists:
folder = exists.group(1)
logger.info("updating existing dependency {}".format(folder))
full_path = os.path.join(self.project['modules-path'], folder)
proc = subprocess.Popen(
['git', 'fetch', '--all'],
cwd=full_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()
remote_branch = 'origin/master' if branch is None \
else 'origin/{}'.format(branch)
proc = subprocess.Popen(
['git', 'reset', '--hard', remote_branch],
cwd=full_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()
if branch is not None:
self.__checkout_branch(branch, full_path)
logger.info('Updating existing dependency {}.'.format(folder))
else:
matches = re.match("Cloning into '(.+)'", err.decode('utf-8'))
folder = matches.group(1)
full_path = os.path.join(self.project['modules-path'], folder)
logger.info("pulled new dependency {}".format(folder))
if branch is not None:
self.__checkout_branch(branch, full_path)
logger.info('Pulling new dependency {}.'.format(folder))

dependency_path = os.path.join(modules_path, folder)
start_sha = dbt.clients.git.get_current_sha(dependency_path)
dbt.clients.git.checkout(dependency_path, branch)
end_sha = dbt.clients.git.get_current_sha(dependency_path)

if exists:
if start_sha == end_sha:
logger.info(' Already at {}, nothing to do.'.format(
start_sha[:6]))
else:
logger.info(' Updated checkout from {} to {}.'.format(
start_sha[:6], end_sha[:6]))
else:
logger.info(' Checked out at {}.'.format(end_sha[:6]))

return folder

Expand Down