Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DeviceManager interface and CPUDeviceManager #2249

Merged
merged 1 commit into from
Jan 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions include/glow/Backends/DeviceManager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GLOW_BACKENDS_DEVICEMANAGER_H
#define GLOW_BACKENDS_DEVICEMANAGER_H

#include "glow/Backends/Backend.h"
#include "glow/Backends/CompiledFunction.h"
#include "glow/Graph/Context.h"
#include "glow/Graph/Graph.h"
#include "glow/Runtime/RuntimeTypes.h"

#include <functional>
#include <map>
#include <string>

namespace glow {

/// Callback signalling success/failure of loading a Module onto a device.
using ReadyCBTy = std::function<void(const Module *, runtime::ResultCode)>;
nickgg marked this conversation as resolved.
Show resolved Hide resolved
/// Callback signalling the result of running a function.
using ResultCBTy = std::function<void(
nickgg marked this conversation as resolved.
Show resolved Hide resolved
runtime::RunIdentifierTy, runtime::ResultCode, std::unique_ptr<Context>)>;
/// Map of Function name -> CompiledFunction, used when loading a network onto a
/// device.
using FunctionMapTy = std::map<std::string, CompiledFunction *>;

/// Interface managing a specific instance of a device.
class DeviceManager {
nickgg marked this conversation as resolved.
Show resolved Hide resolved
protected:
/// Type of Backend for this Device.
BackendKind backend_;

public:
DeviceManager(BackendKind backend) : backend_(backend) {}
virtual ~DeviceManager() {}

/// Initialize the device.
virtual void init() {}

/// Load the provided module into the device, readyCB will be called when
/// ready to use.
/// \p functions contains the list of functions to load, keyed by their name
/// (as used in runFunction).
virtual void addNetwork(const Module *module, FunctionMapTy functions,
ReadyCBTy readyCB) = 0;

/// Remove (and delete) the provided network and all it's functions, freeing
/// up space on the device.
virtual void evictNetwork(const Module *module) = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how do we know if eviction succeeded? (e.g. stats/logging etc)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could generate the event/log from within the device manager.


/// Execute the named Function in an already provided network on the device.
/// functionName must match the name of a function already added.
/// Context should have all Placeholders allocated. resultCB will be called
/// with the Context results filled.
virtual runtime::RunIdentifierTy runFunction(std::string functionName,
std::unique_ptr<Context> ctx,
ResultCBTy resultCB) = 0;

/// Stops execution and shuts down the Device.
virtual void stop(bool block = true) {}

/// \returns the type of Backend that powers this Device.
BackendKind getBackendKind() { return backend_; }

/// \returns the maximum memory (in bytes) available on the device.
virtual uint64_t getMaximumMemory() = 0;

/// \returns the currently available memory (in bytes) available on the
/// device, for provisioning new networks.
virtual uint64_t getAvailableMemory() = 0;

/// \returns true if we expect a Module with the estimated constant size will
/// fit on the device.
virtual bool isMemoryAvailable(uint64_t estimate) = 0;
};

} // namespace glow

#endif // GLOW_BACKENDS_DEVICEMANAGER_H
78 changes: 78 additions & 0 deletions include/glow/Backends/QueueBackedDeviceManager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GLOW_BACKENDS_QUEUEBACKEDDEVICEMANAGER_H
#define GLOW_BACKENDS_QUEUEBACKEDDEVICEMANAGER_H

#include "glow/Backends/DeviceManager.h"
#include "glow/Support/ThreadPool.h"

#include <atomic>

namespace glow {

class QueueBackedDeviceManager : public DeviceManager {
protected:
/// Thread which interfaces with the device.
ThreadPool workThread_;

/// Identifier for next run.
std::atomic<runtime::RunIdentifierTy> nextIdentifier_{1};

public:
QueueBackedDeviceManager(BackendKind backend);
virtual ~QueueBackedDeviceManager();

/// Initialize the device.
void init() override;

/// Load the provided module into the device, readyCB will be called when
/// ready to use
void addNetwork(const Module *module, FunctionMapTy functions,
ReadyCBTy readyCB) override;

/// Remove (and delete) the provided network and all it's functions, freeing
/// up space on the device.
void evictNetwork(const Module *module) override;

/// Execute the named Function in an already provided network on the device.
/// functionName must match the name of a function already added.
/// Context should have all Placeholders allocated. resultCB will be called
/// with the Context results filled.
runtime::RunIdentifierTy runFunction(std::string functionName,
std::unique_ptr<Context> ctx,
ResultCBTy resultCB) override;

/// Stops execution and shuts down the Device.
void stop(bool block = true) override;

protected:
/// Operator handling methods to be implemented in subclasses (i.e. per Device
/// type)

/// Load and compile the Module
virtual void addNetworkImpl(const Module *, FunctionMapTy, ReadyCBTy) = 0;

/// Remove the module and reclaim it's memory
virtual void evictNetworkImpl(const Module *) = 0;

/// Execute provided Function.
virtual void runFunctionImpl(runtime::RunIdentifierTy, std::string,
std::unique_ptr<Context>, ResultCBTy) = 0;
};

} // namespace glow

#endif // GLOW_BACKENDS_QUEUEBACKEDDEVICEMANAGER_H
67 changes: 67 additions & 0 deletions include/glow/Runtime/RuntimeTypes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GLOW_RUNTIME_RUNTIMETYPES_H
#define GLOW_RUNTIME_RUNTIMETYPES_H

#include "glow/Backends/BackendUtils.h"
#include "glow/Graph/Graph.h"

#include <string>
#include <unordered_map>
#include <vector>

namespace glow {
namespace runtime {

using DeviceIDTy = size_t;
using RunIdentifierTy = size_t;

/// Enum to communicate results when communicating with device at initialization
/// and runtime.
enum ResultCode { Ready, Executed, Failed, Cancelled };

/// Data structure that contains device constraint information for each device.
/// Used to communicate memory constraints and later costs to the Partitioner.
struct DeviceInfo {
/// Available memory on device in bytes.
uint64_t availableMemory;
};

/// Individual Node in the DAG for a given network. This contains all the
/// information needed to run the sub-network at inference time.
struct DAGNode {
/// The children of this node, these are nodes that depend on the current
/// node.
std::vector<DAGNode *> children;
/// Pointers to the parents of this node. This is used by the executor for
/// determining if a given node has all dependencies met.
std::vector<DAGNode *> parents;
/// ID of the deviceManager that this network is assigned to.
DeviceIDTy deviceID;
/// The logicalDevice is an output of the Partitioner to indicate that two
/// networks should be assigned to the same device.
DeviceIDTy logicalDevice;
/// Name assigned to the sub-network, this is the id that will be passed to
/// the DeviceManager when requesting a run of the network.
std::string name;
/// Runtime bundle containing all the symbol information for this network at
/// runtime.
RuntimeBundle runtimeBundle;
};

} // namespace runtime
} // namespace glow
#endif // GLOW_RUNTIME_RUNTIMETYPES_H
9 changes: 9 additions & 0 deletions lib/Backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,12 @@ target_link_libraries(Backends
${linked_backends}
Base
Graph)

add_library(DeviceManager QueueBackedDeviceManager.cpp)

target_link_libraries(DeviceManager
PRIVATE
Backends
Graph
ThreadPool)

14 changes: 14 additions & 0 deletions lib/Backends/CPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,17 @@ if(LLVM_VERSION_MAJOR VERSION_GREATER 6)
LLVMOrcJIT)
endif()
add_dependencies(CPUBackend CPURuntime)

add_library(CPUDeviceManager
CPUDeviceManager.cpp)
target_link_libraries(CPUDeviceManager
PRIVATE
Backends
BackendUtils
Base
CodeGen
CPUBackend
DeviceManager
Graph
IR
Optimizer)
103 changes: 103 additions & 0 deletions lib/Backends/CPU/CPUDeviceManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/**
* Copyright (c) 2017-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "CPUDeviceManager.h"

using namespace glow;
using namespace glow::runtime;

uint64_t CPUDeviceManager::getMaximumMemory() { return maxMemoryBytes; }

uint64_t CPUDeviceManager::getAvailableMemory() {
return maxMemoryBytes - usedMemoryBytes;
}

bool CPUDeviceManager::isMemoryAvailable(uint64_t estimate) {
// No fuzz factor for the CPU device.
return maxMemoryBytes >= (usedMemoryBytes + estimate);
}

void CPUDeviceManager::addNetworkImpl(const Module *module,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be Module or Function? We are loading a specific function to be executed. Np?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're loading a set of functions (which we expect to share constants, e.g. the same graph with different batch sizes). The FunctionMap contains CompiledFunctions, but to collect the constants we need access back to the Placeholders which are owned by the module.

FunctionMapTy functions,
ReadyCBTy readyCB) {
auto modIt = modules_.find(module);
if (modIt != modules_.end()) {
// Already have a module with this ID.
// TODO: should we replace it?
readyCB(module, Failed);
return;
}

// TODO: we should update usedMemory but we don't currently have a nice way
// to determine the memory used by the module. I'll come back to this, but for
// now we'll guess (badly).
size_t moduleSize = 200 * 1024 * 1024;

if (usedMemoryBytes + moduleSize > maxMemoryBytes) {
readyCB(module, Failed);
return;
}

// Add to the function name lookup map.
for (const auto &func : functions) {
// TODO: collect constants here when available.
functions_.emplace(func.first, func.second);
}

modules_.emplace_hint(modIt, module, std::move(functions));
usedMemoryBytes += moduleSize;

// Fire the ready CB.
readyCB(module, Ready);
}

void CPUDeviceManager::evictNetworkImpl(const Module *module) {
auto modIt = modules_.find(module);
if (modIt == modules_.end()) {
// Nothing to do.
return;
}

FunctionMapTy moduleFuncs = std::move(modIt->second);
for (const auto &func : moduleFuncs) {
functions_.erase(func.first);
}

modules_.erase(modIt);
usedMemoryBytes -= 200 * 1024 * 1024; // TODO: static moduleSize
assert(usedMemoryBytes >= 0);
}

void CPUDeviceManager::runFunctionImpl(RunIdentifierTy id, std::string function,
std::unique_ptr<Context> ctx,
ResultCBTy resultCB) {
auto funcIt = functions_.find(function);
if (funcIt == functions_.end()) {
resultCB(id, Failed, std::move(ctx));
return;
}

CompiledFunction *func = funcIt->second;

// Run that function.
func->setupRuns();
func->beforeRun(*ctx);
func->execute();
func->afterRun(*ctx);
func->tearDownRuns();

// Fire the resultCB.
resultCB(id, Executed, std::move(ctx));
}
Loading