Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin' into systemd-killmode-patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
insanejudge committed May 17, 2018
2 parents 02aeb21 + 504a910 commit 9d9cd73
Show file tree
Hide file tree
Showing 2,269 changed files with 695,371 additions and 17,587 deletions.
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Do not add existing client/drivers/test-resources/qemu/* files to LFS.
# Adding existing files to LFS requires rewriting history back to the point
# those files were added.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ _testmain.go
*.test
*.prof

tags
bin/
/pkg/
.vagrant/
Expand Down
11 changes: 6 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ services:
language: go

go:
- 1.9.x
- "1.10.x"

addons:
chrome: stable

git:
depth: 300
Expand All @@ -28,13 +31,11 @@ matrix:
- os: osx
fast_finish: true

cache:
directories:
- ui/node_modules

before_install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]] && [[ -z "$SKIP_NOMAD_TESTS" ]]; then sudo -E bash ./scripts/travis-mac-priv.sh ; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]] && [[ -z "$SKIP_NOMAD_TESTS" ]]; then sudo -E bash ./scripts/travis-linux.sh ; fi
- if [[ "$RUN_UI_TESTS" ]]; then curl -o- -L https://yarnpkg.com/install.sh | bash -s -- --version 1.0.1 ; fi
- if [[ "$RUN_UI_TESTS" ]]; then export PATH="$HOME/.yarn/bin:$PATH" ; fi

install:
- if [[ -z "$SKIP_NOMAD_TESTS" ]]; then make deps ; fi
Expand Down
208 changes: 182 additions & 26 deletions CHANGELOG.md

Large diffs are not rendered by default.

22 changes: 12 additions & 10 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ deps: ## Install build and development dependencies
@echo "==> Updating build dependencies..."
go get -u github.com/kardianos/govendor
go get -u github.com/ugorji/go/codec/codecgen
go get -u github.com/jteeuwen/go-bindata/...
go get -u github.com/hashicorp/go-bindata/...
go get -u github.com/elazarl/go-bindata-assetfs/...
go get -u github.com/a8m/tree/cmd/tree
go get -u github.com/magiconair/vendorfmt/cmd/vendorfmt
Expand Down Expand Up @@ -215,18 +215,18 @@ dev: vendorfmt changelogfmt ## Build for the current development platform
@rm -f $(GOPATH)/bin/nomad
@$(MAKE) --no-print-directory \
$(DEV_TARGET) \
GO_TAGS="nomad_test $(NOMAD_UI_TAG)"
GO_TAGS="$(NOMAD_UI_TAG)"
@mkdir -p $(PROJECT_ROOT)/bin
@mkdir -p $(GOPATH)/bin
@cp $(PROJECT_ROOT)/$(DEV_TARGET) $(PROJECT_ROOT)/bin/
@cp $(PROJECT_ROOT)/$(DEV_TARGET) $(GOPATH)/bin

.PHONY: prerelease
prerelease: GO_TAGS=ui
prerelease: GO_TAGS=ui release
prerelease: check generate ember-dist static-assets ## Generate all the static assets for a Nomad release

.PHONY: release
release: GO_TAGS=ui
release: GO_TAGS=ui release
release: clean $(foreach t,$(ALL_TARGETS),pkg/$(t).zip) ## Build all release packages which can be built on this platform.
@echo "==> Results:"
@tree --dirsfirst $(PROJECT_ROOT)/pkg
Expand All @@ -243,11 +243,10 @@ test: ## Run the Nomad test suite and/or the Nomad UI test suite
.PHONY: test-nomad
test-nomad: dev ## Run Nomad test suites
@echo "==> Running Nomad test suites:"
@NOMAD_TEST_RKT=1 \
go test $(if $(VERBOSE),-v) \
@go test $(if $(VERBOSE),-v) \
-cover \
-timeout=900s \
-tags="nomad_test $(if $(HAS_LXC),lxc)" ./... $(if $(VERBOSE), >test.log ; echo $$? > exit-code)
-tags="$(if $(HAS_LXC),lxc)" ./... $(if $(VERBOSE), >test.log ; echo $$? > exit-code)
@if [ $(VERBOSE) ] ; then \
bash -C "$(PROJECT_ROOT)/scripts/test_check.sh" ; \
fi
Expand All @@ -262,6 +261,9 @@ clean: ## Remove build artifacts

.PHONY: travis
travis: ## Run Nomad test suites with output to prevent timeouts under Travis CI
@if [ ! $(SKIP_NOMAD_TESTS) ]; then \
make generate; \
fi
@sh -C "$(PROJECT_ROOT)/scripts/travis.sh"

.PHONY: testcluster
Expand All @@ -277,22 +279,22 @@ testcluster: ## Bring up a Linux test cluster using Vagrant. Set PROVIDER if nec
.PHONY: static-assets
static-assets: ## Compile the static routes to serve alongside the API
@echo "--> Generating static assets"
@go-bindata-assetfs -pkg agent -prefix ui -modtime 1480000000 -tags ui ./ui/dist/...
@go-bindata-assetfs -pkg agent -prefix ui -modtime 1480000000 -tags ui -o bindata_assetfs.go ./ui/dist/...
@mv bindata_assetfs.go command/agent

.PHONY: test-ui
test-ui: ## Run Nomad UI test suite
@echo "--> Installing JavaScript assets"
@cd ui && npm rebuild node-sass
@cd ui && yarn install
@cd ui && npm install phantomjs-prebuilt
@echo "--> Running ember tests"
@cd ui && phantomjs --version
@cd ui && npm test

.PHONY: ember-dist
ember-dist: ## Build the static UI assets from source
@echo "--> Installing JavaScript assets"
@cd ui && yarn install
@cd ui && yarn install --silent
@cd ui && npm rebuild node-sass
@echo "--> Building Ember application"
@cd ui && npm run build
Expand Down
44 changes: 30 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,30 @@ The key features of Nomad are:
installed, scale up and down based on the number of instances requested, and
automatically recover from failures.

* **Operationally Simple**: Nomad runs as a single binary that can be
either a client or server, and is completely self contained. Nomad does
not require any external services for storage or coordination. This means
Nomad combines the features of a resource manager and scheduler in a single
system.

* **Multi-Datacenter and Multi-Region Aware**: Nomad is designed to be
a global-scale scheduler. Multiple datacenters can be managed as part
of a larger region, and jobs can be scheduled across datacenters if
requested. Multiple regions join together and federate jobs making it
easy to run jobs anywhere.

* **Operationally Simple**: Nomad runs as a single binary that can be
either a client or server, and is completely self contained. Nomad does
not require any external services for storage or coordination. This means
Nomad combines the features of a resource manager and scheduler in a single
system.
* **Flexible Workloads**: Nomad has extensible support for task drivers, allowing it to run
containerized, virtualized, and standalone applications. Users can easily start Docker
containers, VMs, or application runtimes like Java. Nomad supports Linux, Windows, BSD, and OSX,
providing the flexibility to run any workload.

* **Distributed and Highly-Available**: Nomad servers cluster together and
perform leader election and state replication to provide high availability
in the face of failure. The Nomad scheduling engine is optimized for
optimistic concurrency allowing all servers to make scheduling decisions to
maximize throughput.
* **Built for Scale**: Nomad was designed from the ground up to support global scale
infrastructure. Nomad is distributed and highly available, using both
leader election and state replication to provide availability in the face
of failures. Nomad is optimistically concurrent, enabling all servers to participate
in scheduling decisions which increases the total throughput and reduces latency
to support demanding workloads. Nomad has been proven to scale to cluster sizes that
exceed 10k nodes in real-world production environments.

* **HashiCorp Ecosystem**: HashiCorp Ecosystem: Nomad integrates with the
entire HashiCorp ecosystem of tools. Like all HashiCorp tools, Nomad follows
Expand All @@ -58,7 +65,7 @@ Developing Nomad

If you wish to work on Nomad itself or any of its built-in systems,
you will first need [Go](https://www.golang.org) installed on your
machine (version 1.9+ is *required*).
machine (version 1.10+ is *required*).

**Developing with Vagrant**
There is an included Vagrantfile that can help bootstrap the process. The
Expand Down Expand Up @@ -102,9 +109,18 @@ Nomad binary in the `bin` and `$GOPATH/bin` folders:

```sh
$ make dev
...
$ bin/nomad
...
```

Optionally run Consul to enable service discovery and health checks:

```sh
$ sudo consul agent -dev
```

And finally start the nomad agent:

```sh
$ sudo bin/nomad agent -dev
```

If the Nomad UI is desired in the development version, run `make dev-ui`. This will build the UI from source and compile it into the dev binary.
Expand Down
4 changes: 3 additions & 1 deletion Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ end

def configureProviders(vmCfg, cpus: "2", memory: "2048")
vmCfg.vm.provider "virtualbox" do |v|
v.customize ["modifyvm", :id, "--cableconnected1", "on"]
v.memory = memory
v.cpus = cpus
end
Expand All @@ -142,6 +143,7 @@ def configureProviders(vmCfg, cpus: "2", memory: "2048")
end

vmCfg.vm.provider "virtualbox" do |v|
v.customize ["modifyvm", :id, "--cableconnected1", "on"]
v.memory = memory
v.cpus = cpus
end
Expand All @@ -154,7 +156,7 @@ def suggestedCPUCores()
when /darwin/
Integer(`sysctl -n hw.ncpu`) / 2
when /linux/
Integer(`cat /proc/cpuinfo | grep processor | wc -l`) / 2
Integer(`grep -c ^processor /proc/cpuinfo`) / 2
else
2
end
Expand Down
4 changes: 2 additions & 2 deletions api/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func (a *Agent) Health() (*AgentHealthResponse, error) {
}

// Return custom error when response is not expected JSON format
return nil, fmt.Errorf("unable to unmarhsal response with status %d: %v", resp.StatusCode, err)
return nil, fmt.Errorf("unable to unmarshal response with status %d: %v", resp.StatusCode, err)
}

// joinResponse is used to decode the response we get while
Expand Down Expand Up @@ -291,7 +291,7 @@ func (a AgentMembersNameSort) Less(i, j int) bool {

}

// AgentHealthResponse is the response from the Health endpoint desecribing an
// AgentHealthResponse is the response from the Health endpoint describing an
// agent's health.
type AgentHealthResponse struct {
Client *AgentHealth `json:"client,omitempty"`
Expand Down
6 changes: 3 additions & 3 deletions api/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TestAgent_Join(t *testing.T) {
})
defer s2.Stop()

// Attempting to join a non-existent host returns error
// Attempting to join a nonexistent host returns error
n, err := a1.Join("nope")
if err == nil {
t.Fatalf("expected error, got nothing")
Expand Down Expand Up @@ -123,7 +123,7 @@ func TestAgent_ForceLeave(t *testing.T) {
defer s.Stop()
a := c.Agent()

// Force-leave on a non-existent node does not error
// Force-leave on a nonexistent node does not error
if err := a.ForceLeave("nope"); err != nil {
t.Fatalf("err: %s", err)
}
Expand Down Expand Up @@ -241,7 +241,7 @@ func TestAgents_Sort(t *testing.T) {
for _, tt := range sortTests {
sort.Sort(AgentMembersNameSort(tt.in))
if !reflect.DeepEqual(tt.in, tt.out) {
t.Errorf("\necpected: %s\nget : %s", tt.in, tt.out)
t.Errorf("\nexpected: %s\nget : %s", tt.in, tt.out)
}
}
}
Expand Down
80 changes: 74 additions & 6 deletions api/allocations.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,9 @@ func (a *Allocations) Info(allocID string, q *QueryOptions) (*Allocation, *Query
}

func (a *Allocations) Stats(alloc *Allocation, q *QueryOptions) (*AllocResourceUsage, error) {
nodeClient, err := a.client.GetNodeClient(alloc.NodeID, q)
if err != nil {
return nil, err
}

var resp AllocResourceUsage
_, err = nodeClient.query("/v1/client/allocation/"+alloc.ID+"/stats", &resp, nil)
path := fmt.Sprintf("/v1/client/allocation/%s/stats", alloc.ID)
_, err := a.client.query(path, &resp, q)
return &resp, err
}

Expand Down Expand Up @@ -85,12 +81,16 @@ type Allocation struct {
Metrics *AllocationMetric
DesiredStatus string
DesiredDescription string
DesiredTransition DesiredTransition
ClientStatus string
ClientDescription string
TaskStates map[string]*TaskState
DeploymentID string
DeploymentStatus *AllocDeploymentStatus
FollowupEvalID string
PreviousAllocation string
NextAllocation string
RescheduleTracker *RescheduleTracker
CreateIndex uint64
ModifyIndex uint64
AllocModifyIndex uint64
Expand Down Expand Up @@ -130,6 +130,8 @@ type AllocationListStub struct {
ClientDescription string
TaskStates map[string]*TaskState
DeploymentStatus *AllocDeploymentStatus
FollowupEvalID string
RescheduleTracker *RescheduleTracker
CreateIndex uint64
ModifyIndex uint64
CreateTime int64
Expand All @@ -141,6 +143,8 @@ type AllocationListStub struct {
// healthy.
type AllocDeploymentStatus struct {
Healthy *bool
Timestamp time.Time
Canary bool
ModifyIndex uint64
}

Expand All @@ -158,3 +162,67 @@ func (a AllocIndexSort) Less(i, j int) bool {
func (a AllocIndexSort) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}

// RescheduleInfo is used to calculate remaining reschedule attempts
// according to the given time and the task groups reschedule policy
func (a Allocation) RescheduleInfo(t time.Time) (int, int) {
var reschedulePolicy *ReschedulePolicy
for _, tg := range a.Job.TaskGroups {
if *tg.Name == a.TaskGroup {
reschedulePolicy = tg.ReschedulePolicy
}
}
if reschedulePolicy == nil {
return 0, 0
}
availableAttempts := *reschedulePolicy.Attempts
interval := *reschedulePolicy.Interval
attempted := 0

// Loop over reschedule tracker to find attempts within the restart policy's interval
if a.RescheduleTracker != nil && availableAttempts > 0 && interval > 0 {
for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- {
lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime
timeDiff := t.UTC().UnixNano() - lastAttempt
if timeDiff < interval.Nanoseconds() {
attempted += 1
}
}
}
return attempted, availableAttempts
}

// RescheduleTracker encapsulates previous reschedule events
type RescheduleTracker struct {
Events []*RescheduleEvent
}

// RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation
type RescheduleEvent struct {
// RescheduleTime is the timestamp of a reschedule attempt
RescheduleTime int64

// PrevAllocID is the ID of the previous allocation being restarted
PrevAllocID string

// PrevNodeID is the node ID of the previous allocation
PrevNodeID string
}

// DesiredTransition is used to mark an allocation as having a desired state
// transition. This information can be used by the scheduler to make the
// correct decision.
type DesiredTransition struct {
// Migrate is used to indicate that this allocation should be stopped and
// migrated to another node.
Migrate *bool

// Reschedule is used to indicate that this allocation is eligible to be
// rescheduled.
Reschedule *bool
}

// ShouldMigrate returns whether the transition object dictates a migration.
func (d DesiredTransition) ShouldMigrate() bool {
return d.Migrate != nil && *d.Migrate
}
Loading

0 comments on commit 9d9cd73

Please sign in to comment.