Skip to content

Commit

Permalink
【静态图性能优化】图依赖信息复用 (PaddlePaddle#55389)
Browse files Browse the repository at this point in the history
* add share api for DependencyBuilder

* add judge codes for sharing build results

* add ShareBuildResultsFrom

* update ShareDependencyFrom

* fix error

* add share codes

* fix memory error

* update according review

* update notes

* fix code style

* remove const_cast

* fix code style
  • Loading branch information
AndSonder authored and wz1qqx committed Jul 31, 2023
1 parent dbc52a2 commit 2731ead
Show file tree
Hide file tree
Showing 10 changed files with 155 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,26 @@ const std::string StringizeDownstreamMap(
return oss.str();
}

DependencyBuilder::DependencyBuilder()
: is_build_(false), instructions_(nullptr) {
op_downstream_map_ = std::make_shared<std::map<size_t, std::set<size_t>>>();
op_happens_before_ = std::make_shared<std::vector<std::vector<bool>>>();
}

const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
const std::vector<Instruction>& instructions) {
if (is_build_) {
return op_downstream_map_;
return *op_downstream_map_;
}

std::tie(op_downstream_map_, op_happens_before_) = GetDependency();

instructions_ = &instructions;
op_num_ = instructions_->size();

ops_before_.assign(op_num_, {});
ops_behind_.assign(op_num_, {});
op_happens_before_.assign(op_num_, std::vector<bool>(op_num_, false));
op_happens_before_->assign(op_num_, std::vector<bool>(op_num_, false));

BuildDownstreamMap();
VLOG(6) << "Finish BuildDownstreamMap";
Expand All @@ -97,13 +105,24 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::Build(
VLOG(6) << "Finish AddDependencyForReadOp";

VLOG(6) << "Finish build dependency";
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
VLOG(8) << "downstream count: " << CountDownstreamMap(*op_downstream_map_);
VLOG(8) << "downstream_map: " << std::endl
<< StringizeDownstreamMap(op_downstream_map_);
<< StringizeDownstreamMap(*op_downstream_map_);

is_build_ = true;

return op_downstream_map_;
return *op_downstream_map_;
}

std::tuple<std::shared_ptr<std::map<size_t, std::set<size_t>>>,
std::shared_ptr<std::vector<std::vector<bool>>>>
DependencyBuilder::GetDependency() const {
return std::make_tuple(op_downstream_map_, op_happens_before_);
}

void DependencyBuilder::ShareDependencyFrom(const DependencyBuilder& src) {
std::tie(op_downstream_map_, op_happens_before_) = src.GetDependency();
is_build_ = true;
}

const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
Expand All @@ -113,7 +132,7 @@ const std::map<size_t, std::set<size_t>>& DependencyBuilder::OpDownstreamMap()
true,
phi::errors::Unavailable(
"DependencyBuilder is not yet built, call Build() firstly."));
return op_downstream_map_;
return *op_downstream_map_;
}

void DependencyBuilder::AddDependencyForCoalesceTensorOp() {
Expand Down Expand Up @@ -268,8 +287,8 @@ void DependencyBuilder::AddDependencyForRandomOp() {
void DependencyBuilder::AddDependencyForReadOp() {
std::vector<bool> is_startup_ops(op_num_, true);
for (size_t op_idx = 0; op_idx < op_num_; ++op_idx) {
auto it = op_downstream_map_.find(op_idx);
if (it != op_downstream_map_.end()) {
auto it = op_downstream_map_->find(op_idx);
if (it != op_downstream_map_->end()) {
for (size_t downstream_op_idx : it->second) {
is_startup_ops[downstream_op_idx] = false;
}
Expand Down Expand Up @@ -320,8 +339,7 @@ void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,
posterior_op_idx,
posterior_op_idx,
prior_op_idx));

std::set<size_t>& downstream_ops = op_downstream_map_[prior_op_idx];
std::set<size_t>& downstream_ops = (*op_downstream_map_)[prior_op_idx];
// NOTE(Ruibiao): Here the downstream map shrinking is best-effort, therefore
// ShrinkDownstreamMap after BuildDownstreamMap is still helpful. For example,
// a->c will not be shrinked in the following case: AddDownstreamOp(a, b) ->
Expand All @@ -342,8 +360,8 @@ void DependencyBuilder::AddDownstreamOp(size_t prior_op_idx,

auto update_op_happen_before = [this](size_t prior_op_idx,
size_t posterior_op_idx) {
if (!op_happens_before_[prior_op_idx][posterior_op_idx]) {
op_happens_before_[prior_op_idx][posterior_op_idx] = true;
if (!(*op_happens_before_)[prior_op_idx][posterior_op_idx]) {
(*op_happens_before_)[prior_op_idx][posterior_op_idx] = true;
ops_before_[posterior_op_idx].push_back(prior_op_idx);
ops_behind_[prior_op_idx].push_back(posterior_op_idx);
}
Expand Down Expand Up @@ -377,8 +395,8 @@ void DependencyBuilder::BuildDownstreamMap() {
std::map<size_t, size_t>(); // # map from variable to recent write op.
auto op2dependences =
std::map<size_t,
std::set<size_t>>(); //# map from op to the dependence list,
// op must run after the dependence.
std::set<size_t>>(); // # map from op to the dependence list,
// op must run after the dependence.
std::set<size_t>
remove_duplicate; // remove the duplicate between inputs and outputs

Expand Down Expand Up @@ -497,15 +515,15 @@ void DependencyBuilder::ShrinkDownstreamMap() {
// shrink, find the downstream op that has no other op in the
// downstream list happens before it
for (size_t i = 0; i < op_num_; ++i) {
if (op_downstream_map_.find(i) == op_downstream_map_.end()) {
if (op_downstream_map_->find(i) == op_downstream_map_->end()) {
continue;
}

std::set<size_t> minumum_nexts;
for (size_t item : op_downstream_map_.at(i)) {
for (size_t item : op_downstream_map_->at(i)) {
bool not_after_any = true;
// find the op that is not executed after any
for (size_t other_item : op_downstream_map_.at(i)) {
for (size_t other_item : op_downstream_map_->at(i)) {
if (OpHappensBefore(other_item, item)) {
VLOG(8) << "happens_before: " << other_item << "->" << item
<< ", so skip " << item;
Expand All @@ -520,12 +538,12 @@ void DependencyBuilder::ShrinkDownstreamMap() {
}
// NOTE(Ruibiao): op_happens_before will not be changed when shrink
// dowstream map
op_downstream_map_.at(i) = minumum_nexts;
(*op_downstream_map_)[i] = minumum_nexts;
}
VLOG(8) << "Finish shrink downstream map";
VLOG(8) << "downstream count: " << CountDownstreamMap(op_downstream_map_);
VLOG(8) << "downstream count: " << CountDownstreamMap(*op_downstream_map_);
VLOG(8) << "downstream_map: " << std::endl
<< StringizeDownstreamMap(op_downstream_map_);
<< StringizeDownstreamMap(*op_downstream_map_);
}

/// ======================== ///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,29 @@ namespace interpreter {

class DependencyBuilder {
public:
DependencyBuilder() : is_build_(false), instructions_(nullptr) {}
DependencyBuilder();

// build op dependencies and return the mapping from op to its downstream-op
// set
const std::map<size_t, std::set<size_t>>& Build(
const std::vector<Instruction>& instructions);

std::tuple<std::shared_ptr<std::map<size_t, std::set<size_t>>>,
std::shared_ptr<std::vector<std::vector<bool>>>>
GetDependency() const;

const std::map<size_t, std::set<size_t>>& OpDownstreamMap() const;

bool OpHappensBefore(size_t prior_op_idx, size_t posterior_op_idx) const {
PADDLE_ENFORCE_GE(
op_happens_before_.size(),
op_happens_before_->size(),
0,
phi::errors::Unavailable("op_happen_before is not yet built"));
return op_happens_before_.at(prior_op_idx).at(posterior_op_idx);
return op_happens_before_->at(prior_op_idx).at(posterior_op_idx);
}

void ShareDependencyFrom(const DependencyBuilder& src);

private:
void AddDependencyForCoalesceTensorOp();
void AddDependencyForCommunicationOp();
Expand All @@ -76,13 +82,13 @@ class DependencyBuilder {
std::vector<std::vector<size_t>> ops_behind_;

// op_downstream_map_ is the mapping from op to its downstream-op set, that is
// to say, op_downstream_map_[i] == {a, b, c} means op[a], op[b] and op[c]
// to say, (*op_downstream_map_)[i] == {a, b, c} means op[a], op[b] and op[c]
// depend on op[i] directly.
std::map<size_t, std::set<size_t>> op_downstream_map_;
std::shared_ptr<std::map<size_t, std::set<size_t>>> op_downstream_map_;

// op_happens_before_ is a matrix form of ops_before_ and ops_behind_, it is
// used to speed up the query.
std::vector<std::vector<bool>> op_happens_before_;
std::shared_ptr<std::vector<std::vector<bool>>> op_happens_before_;
};

// /// ======================== ///
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/framework/new_executor/interpreter_base_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ class InterpreterBaseImpl {

virtual void ShareWorkQueueFrom(InterpreterBaseImpl* src) = 0;

virtual void ShareBuildResultsFrom(const InterpreterBaseImpl& src) = 0;

virtual void SetCopyProgram(std::shared_ptr<ProgramDesc> prog) = 0;

virtual void SetSkipGcVars(const std::set<std::string>& skip_gc_vars) = 0;
Expand All @@ -97,6 +99,11 @@ class InterpreterBaseImpl {
virtual const platform::Place& GetPlace() const = 0;

virtual void SetOutputHooks(const std::vector<HookFunc>& hookfuncs) = 0;

virtual const interpreter::DependencyBuilder& GetDependencyBuilder()
const = 0;

virtual std::shared_ptr<std::vector<size_t>> GetDependencyCount() const = 0;
};

inline void SetDeviceId(const platform::Place& place) {
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/framework/new_executor/interpretercore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ void InterpreterCore::ShareWorkQueueFrom(std::shared_ptr<InterpreterCore> src) {
impl_->ShareWorkQueueFrom(const_cast<InterpreterBaseImpl*>(src->Impl()));
}

void InterpreterCore::ShareBuildResultsFrom(
std::shared_ptr<InterpreterCore> src) {
// ShareBuildResultsFrom required const InterpreterBaseImpl& src as input
impl_->ShareBuildResultsFrom(*src->Impl());
}

void InterpreterCore::SetCopyProgram(std::shared_ptr<ProgramDesc> prog) {
impl_->SetCopyProgram(prog);
}
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/new_executor/interpretercore.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class InterpreterCore {

void ShareWorkQueueFrom(std::shared_ptr<InterpreterCore> src);

void ShareBuildResultsFrom(std::shared_ptr<InterpreterCore> src);

void SetCopyProgram(std::shared_ptr<ProgramDesc> prog);

void SetSkipGcVars(const std::set<std::string>& skip_gc_vars);
Expand Down
18 changes: 18 additions & 0 deletions paddle/fluid/framework/new_executor/new_ir_interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,24 @@ void NewIRInterpreter::ShareWorkQueueFrom(InterpreterBaseImpl* src) {
<< ") to InterpreterCore(" << this << ")";
}

void NewIRInterpreter::ShareBuildResultsFrom(const InterpreterBaseImpl& src) {
PADDLE_THROW(platform::errors::Unimplemented(
"ShareBuildResultsFrom is not implemented in NewIRInterpreter."));
}

// op dependences
const interpreter::DependencyBuilder& NewIRInterpreter::GetDependencyBuilder()
const {
PADDLE_THROW(platform::errors::Unimplemented(
"GetDependencyBuilder is not implemented in NewIRInterpreter."));
}

std::shared_ptr<std::vector<size_t>> NewIRInterpreter::GetDependencyCount()
const {
PADDLE_THROW(platform::errors::Unimplemented(
"GetDependencyCount is not implemented in NewIRInterpreter."));
}

bool NewIRInterpreter::BuildInplaceCheckVarIsOnlyInput(
const std::vector<std::vector<size_t>>& input_var2op, size_t var_index) {
if (!var_scope_.VarDesc(var_index)) {
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/framework/new_executor/new_ir_interpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ class NewIRInterpreter : public InterpreterBaseImpl {

void ShareWorkQueueFrom(InterpreterBaseImpl* src) override;

void ShareBuildResultsFrom(const InterpreterBaseImpl& src) override;

// op dependences
const interpreter::DependencyBuilder& GetDependencyBuilder() const override;

std::shared_ptr<std::vector<size_t>> GetDependencyCount() const override;

void SetCopyProgram(std::shared_ptr<ProgramDesc> prog) override;

void SetSkipGcVars(const std::set<std::string>& skip_gc_vars) override;
Expand Down
Loading

0 comments on commit 2731ead

Please sign in to comment.