Skip to content

Commit

Permalink
Setup Nesta Data Science cookiecutter (#2)
Browse files Browse the repository at this point in the history
Co-authored-by: nasrahussein1 <[email protected]>
  • Loading branch information
lizgzil and nasrahussein1 authored Jun 30, 2021
1 parent bcb9992 commit 32d1abc
Show file tree
Hide file tree
Showing 29 changed files with 714 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .env.shared
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# For shared non-secret configuration
# Do NOT change these:
export PROJECT_OPENNESS=public
export PROJECT_NAME=skills-taxonomy-v2
export REPO_NAME=skills_taxonomy_v2
export GITHUB_ACCOUNT=nestauk
# Re-configurable:
export BUCKET=skills-taxonomy-v2
export METAFLOW_PROFILE=ds-cookiecutter
14 changes: 14 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
---

Checklist:

- [ ] I have refactored my code out from `notebooks/`
- [ ] I have checked the code runs
- [ ] I have tested the code
- [ ] I have run `pre-commit` and addressed any issues not automatically fixed
- [ ] I have merged any new changes from `dev`
- [ ] I have documented the code
- [ ] Major functions have docstrings
- [ ] Appropriate information has been added to `README`s
- [ ] I have explained the feature in this PR or (better) in `output/reports/`
- [ ] I have requested a code review
123 changes: 123 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
*.lock

# Proposed convention
scratch*

# Nesta specific
research_daps/
*.key

# Report
*.aux
*.bbl
*.blg

# Metaflow
.metaflow
metaflow.s3.*
.run_id

# Notebooks (use jupytext instead!)
*.ipynb

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.pytest_cache

# Translations
*.mo
*.pot

# Django stuff:/
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# configuration
.env
.config
*.config

# Database
*.db
*.rdb

# Pycharm
.idea

# VS Code
.vscode/

# Spyder
.spyproject/

# Jupyter NB Checkpoints
.ipynb_checkpoints/

# exclude /inputs and /outputs/data except top-level markdown
/inputs/*
!/inputs/*.md
/outputs/data/*
!/outputs/data/*.md
/outputs/models/*
!/outputs/models/*.md

# Mac OS-specific storage files
.DS_Store

# vim
*.swp
*.swo
*~

# Mypy cache
.mypy_cache/

# NPM
node_modules/
61 changes: 61 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
repos:
- repo: local
hooks:
- id: black
name: black
entry: black
language: system
types: [python]
require_serial: true
- id: check-added-large-files
name: Check for added large files
entry: check-added-large-files
language: system
- id: check-merge-conflict
name: Check for files with merge conflict strings
entry: check-merge-conflict
language: system
- id: end-of-file-fixer
name: Fix End of Files
entry: end-of-file-fixer
language: system
types: [text]
stages: [commit, push, manual]
- id: trailing-whitespace
name: Trim Trailing Whitespace
entry: trailing-whitespace-fixer
language: system
types: [text]
stages: [commit, push, manual]
- id: check-toml
name: Check Toml
entry: check-toml
language: system
types: [toml]
- id: check-yaml
name: Check Yaml
entry: check-yaml
language: system
types: [yaml]
# - id: reorder-python-imports
# name: Reorder python imports
# entry: reorder-python-imports
# language: system
# types: [python]
# - id: flake8
# name: flake8
# entry: flake8
# language: system
# types: [python]
# args: ["--config=.flake8"]
# require_serial: true
- id: no-commit-to-branch
name: Prevent commits to dev / master
entry: no-commit-to-branch
language: python
args: ["-b", dev, "-b", master, "-b", main]
pass_filenames: false
- repo: https://github.com/prettier/pre-commit
rev: v2.1.2
hooks:
- id: prettier
11 changes: 11 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

The MIT License (MIT)
=====================
Copyright (c) 2021, nasra

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

175 changes: 175 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
SHELL := /bin/bash

# Detect how to open things depending on our OS
OS = $(shell uname -s)
ifeq ($(OS),Linux)
OPEN=xdg-open
else
OPEN=open
endif

PROFILE = default
# Import env variables
include .env.shared
include .env

# Allow us to execute make commands from within our project's conda env
# TODO: add over-ride based on some environment variable?
# e.g. `MAKE_NO_ENV` in `.env` set makes this do nothing
define execute_in_env
source bin/conda_activate.sh && conda_activate && $1
endef

.PHONY: init
## Fully initialise a project: install; setup github repo; setup S3 bucket
init: install setup-github setup-bucket
@echo INIT COMPLETE

.PHONY: install
## Install a project: create conda env; install local package; setup git hooks; setup metaflow+AWS
install: conda-create setup-git setup-metaflow
@echo INSTALL COMPLETE

.PHONY: inputs-pull
## Pull `inputs/` from S3
inputs-pull:
$(call execute_in_env, aws s3 sync s3://${BUCKET}/inputs inputs --profile ${PROFILE})

.PHONY: inputs-push
## Push `inputs/` to S3 (WARNING: this may overwrite existing files!)
inputs-push:
$(call execute_in_env, aws s3 sync inputs s3://${BUCKET}/inputs --profile ${PROFILE})

.PHONY: docs
## Build the API documentation
docs:
$(call execute_in_env, sphinx-apidoc -o docs/api ${REPO_NAME})
$(call execute_in_env, sphinx-build -b docs/ docs/_build)

.PHONY: docs-clean
## Clean the built API documentation
docs-clean:
rm -r docs/source/api
rm -r docs/_build

.PHONY: docs-open
## Open the docs in the browser
docs-open:
$(OPEN) docs/_build/index.html

.PHONY: conda-create
## Create a conda environment
conda-create:
conda env create -q -n ${PROJECT_NAME} -f environment.yaml
$(MAKE) -s pip-install

.PHONY: conda-update
## Update the conda-environment based on changes to `environment.yaml`
conda-update:
conda env update -n ${PROJECT_NAME} -f environment.yaml
$(MAKE) pip-install

.PHONY: clean
## Delete all compiled Python files
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete

.PHONY: pre-commit
## Perform pre-commit actions
pre-commit:
$(call execute_in_env, pre-commit)

.PHONY: lint
## Run flake8 linting on repository
lint:
$(call execute_in_env, flake8)

.PHONY: pip-install
## Install our package and requirements in editable mode (including development dependencies)
pip-install:
$(call execute_in_env, pip install -e ".[dev]" --quiet)

#################################################################################
# Helper Commands (no need to explicitly document) #
#################################################################################

.PHONY: setup-git
setup-git:
$(call execute_in_env, pre-commit install --install-hooks)

.PHONY: setup-metaflow
setup-metaflow:
$(call execute_in_env, ${SHELL} ./bin/install_metaflow_aws.sh)

.PHONY: setup-bucket
setup-bucket:
@echo S£
$(call execute_in_env, ${SHELL} ./bin/create_bucket.sh)

.PHONY: setup-github
setup-github:
@echo GH
$(call execute_in_env, ${SHELL} ./bin/create_repo.sh)


#################################################################################
# Self Documenting Commands #
#################################################################################

.DEFAULT_GOAL := help

# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
# sed script explained:
# /^##/:
# * save line in hold space
# * purge line
# * Loop:
# * append newline + line to hold space
# * go to next line
# * if line starts with doc comment, strip comment character off and loop
# * remove target prerequisites
# * append hold space (+ newline) to line
# * replace newline plus comments by `---`
# * print line
# Separate expressions are necessary because labels cannot be delimited by
# semicolon; see <http://stackoverflow.com/a/11799865/1968>
.PHONY: help
help:
@echo "$$(tput bold)Available rules:$$(tput sgr0)"
@echo
@sed -n -e "/^## / { \
h; \
s/.*//; \
:doc" \
-e "H; \
n; \
s/^## //; \
t doc" \
-e "s/:.*//; \
G; \
s/\\n## /---/; \
s/\\n/ /g; \
p; \
}" ${MAKEFILE_LIST} \
| LC_ALL='C' sort --ignore-case \
| awk -F '---' \
-v ncol=$$(tput cols) \
-v indent=19 \
-v col_on="$$(tput setaf 6)" \
-v col_off="$$(tput sgr0)" \
'{ \
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
n = split($$2, words, " "); \
line_length = ncol - indent; \
for (i = 1; i <= n; i++) { \
line_length -= length(words[i]) + 1; \
if (line_length <= 0) { \
line_length = ncol - indent - length(words[i]) - 1; \
printf "\n%*s ", -indent, " "; \
} \
printf "%s ", words[i]; \
} \
printf "\n"; \
}' \
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
Loading

0 comments on commit 32d1abc

Please sign in to comment.