diff --git a/docs/md/lb-manager.md b/docs/md/lb-manager.md index 9f9a069c41..71ab85e4fe 100644 --- a/docs/md/lb-manager.md +++ b/docs/md/lb-manager.md @@ -13,8 +13,10 @@ To enable load balancing, the cmake flag \code{.cmake} -Dvt_lb_enabled=1 instrumentation of work and communication performed by collection elements. To run a load balancer at runtime: - - Pass `--vt_lb --vt_lb_name=` as a command line argument - - Write a LB specification file `--vt_lb --vt_lb_file_name=` + +- Pass `--vt_lb --vt_lb_name=` as a command line argument +- Write a LB specification file `--vt_lb --vt_lb_file_name=` +- One can also pass `--vt_lb_self_migration` as a command line argument to allow load balancer to migrate objects to the same node \section lb-specification-file LB Specification File diff --git a/src/vt/collective/collective_ops.cc b/src/vt/collective/collective_ops.cc index d950f992d9..967611865e 100644 --- a/src/vt/collective/collective_ops.cc +++ b/src/vt/collective/collective_ops.cc @@ -137,6 +137,7 @@ void printOverwrittens( printIfOverwritten(vt_lb_data_file); printIfOverwritten(vt_lb_data_dir_in); printIfOverwritten(vt_lb_data_file_in); + printIfOverwritten(vt_lb_self_migration); printIfOverwritten(vt_help_lb_args); printIfOverwritten(vt_no_detect_hang); printIfOverwritten(vt_print_no_progress); diff --git a/src/vt/configs/arguments/app_config.h b/src/vt/configs/arguments/app_config.h index 39f7cde1bd..4cf2fa5aae 100644 --- a/src/vt/configs/arguments/app_config.h +++ b/src/vt/configs/arguments/app_config.h @@ -135,21 +135,22 @@ struct AppConfig { bool vt_trace_event_polling = false; bool vt_trace_irecv_polling = false; - bool vt_lb = false; - bool vt_lb_show_spec = false; - bool vt_lb_quiet = false; - std::string vt_lb_file_name = ""; - std::string vt_lb_name = "NoLB"; - std::string vt_lb_args = ""; - int32_t vt_lb_interval = 1; - bool vt_lb_keep_last_elm = false; - bool vt_lb_data = false; - bool vt_lb_data_compress = true; + bool vt_lb = false; + bool vt_lb_show_spec = false; + bool vt_lb_quiet = false; + std::string vt_lb_file_name = ""; + std::string vt_lb_name = "NoLB"; + std::string vt_lb_args = ""; + int32_t vt_lb_interval = 1; + bool vt_lb_keep_last_elm = false; + bool vt_lb_data = false; + bool vt_lb_data_compress = true; std::string vt_lb_data_dir = "vt_lb_data"; std::string vt_lb_data_file = "data.%p.json"; std::string vt_lb_data_dir_in = "vt_lb_data_in"; std::string vt_lb_data_file_in = "data.%p.json"; - bool vt_help_lb_args = false; + bool vt_help_lb_args = false; + bool vt_lb_self_migration = false; bool vt_no_detect_hang = false; bool vt_print_no_progress = true; @@ -314,6 +315,7 @@ struct AppConfig { | vt_lb_data_dir_in | vt_lb_data_file_in | vt_help_lb_args + | vt_lb_self_migration | vt_no_detect_hang | vt_print_no_progress diff --git a/src/vt/configs/arguments/args.cc b/src/vt/configs/arguments/args.cc index 446b6a067e..9fb4ce6e84 100644 --- a/src/vt/configs/arguments/args.cc +++ b/src/vt/configs/arguments/args.cc @@ -475,27 +475,28 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto lb_data_file = "Load balancing data output file name"; auto lb_data_dir_in = "Load balancing data input directory"; auto lb_data_file_in = "Load balancing data input file name"; + auto lb_self_migration = "Allow load balancer to migrate objects to the same node"; auto lbn = "NoLB"; auto lbi = 1; auto lbf = ""; auto lbd = "vt_lb_data"; auto lbs = "data"; auto lba = ""; - auto s = app.add_flag("--vt_lb", appConfig.vt_lb, lb); - auto t1 = app.add_flag("--vt_lb_quiet", appConfig.vt_lb_quiet, lb_quiet); - auto u = app.add_option("--vt_lb_file_name", appConfig.vt_lb_file_name, lb_file_name, lbf) - ->check(CLI::ExistingFile); - auto u1 = app.add_flag("--vt_lb_show_spec", appConfig.vt_lb_show_spec, lb_show_spec); - auto v = app.add_option("--vt_lb_name", appConfig.vt_lb_name, lb_name, lbn); - auto v1 = app.add_option("--vt_lb_args", appConfig.vt_lb_args, lb_args, lba); - auto w = app.add_option("--vt_lb_interval", appConfig.vt_lb_interval, lb_interval, lbi); - auto wl = app.add_flag("--vt_lb_keep_last_elm", appConfig.vt_lb_keep_last_elm, lb_keep_last_elm); - auto ww = app.add_flag("--vt_lb_data", appConfig.vt_lb_data, lb_data); - auto xz = app.add_flag("--vt_lb_data_compress", appConfig.vt_lb_data_compress, lb_data_comp); - auto wx = app.add_option("--vt_lb_data_dir", appConfig.vt_lb_data_dir, lb_data_dir, lbd); - auto wy = app.add_option("--vt_lb_data_file", appConfig.vt_lb_data_file, lb_data_file,lbs); - auto xx = app.add_option("--vt_lb_data_dir_in", appConfig.vt_lb_data_dir_in, lb_data_dir_in, lbd); - auto xy = app.add_option("--vt_lb_data_file_in", appConfig.vt_lb_data_file_in, lb_data_file_in,lbs); + auto s = app.add_flag("--vt_lb", appConfig.vt_lb, lb); + auto t1 = app.add_flag("--vt_lb_quiet", appConfig.vt_lb_quiet, lb_quiet); + auto u = app.add_option("--vt_lb_file_name", appConfig.vt_lb_file_name, lb_file_name, lbf)->check(CLI::ExistingFile); + auto u1 = app.add_flag("--vt_lb_show_spec", appConfig.vt_lb_show_spec, lb_show_spec); + auto v = app.add_option("--vt_lb_name", appConfig.vt_lb_name, lb_name, lbn); + auto v1 = app.add_option("--vt_lb_args", appConfig.vt_lb_args, lb_args, lba); + auto w = app.add_option("--vt_lb_interval", appConfig.vt_lb_interval, lb_interval, lbi); + auto wl = app.add_flag("--vt_lb_keep_last_elm", appConfig.vt_lb_keep_last_elm, lb_keep_last_elm); + auto ww = app.add_flag("--vt_lb_data", appConfig.vt_lb_data, lb_data); + auto xz = app.add_flag("--vt_lb_data_compress", appConfig.vt_lb_data_compress, lb_data_comp); + auto wx = app.add_option("--vt_lb_data_dir", appConfig.vt_lb_data_dir, lb_data_dir, lbd); + auto wy = app.add_option("--vt_lb_data_file", appConfig.vt_lb_data_file, lb_data_file,lbs); + auto xx = app.add_option("--vt_lb_data_dir_in", appConfig.vt_lb_data_dir_in, lb_data_dir_in, lbd); + auto xy = app.add_option("--vt_lb_data_file_in", appConfig.vt_lb_data_file_in, lb_data_file_in, lbs); + auto lbasm = app.add_option("--vt_lb_self_migration", appConfig.vt_lb_self_migration, lb_self_migration); auto debugLB = "Load Balancing"; s->group(debugLB); @@ -512,6 +513,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { xx->group(debugLB); xy->group(debugLB); xz->group(debugLB); + lbasm->group(debugLB); // help options deliberately omitted from the debugLB group above so that // they appear grouped with --vt_help when --vt_help is used diff --git a/src/vt/messaging/active.cc b/src/vt/messaging/active.cc index 29f277748a..e8a6a0e46a 100644 --- a/src/vt/messaging/active.cc +++ b/src/vt/messaging/active.cc @@ -217,9 +217,10 @@ EventType ActiveMessenger::sendMsgBytesWithPut( TagType const& send_tag ) { auto msg = base.get(); - auto const& is_term = envelopeIsTerm(msg->env); - auto const& is_put = envelopeIsPut(msg->env); - auto const& is_put_packed = envelopeIsPackedPutType(msg->env); + auto const is_term = envelopeIsTerm(msg->env); + auto const is_put = envelopeIsPut(msg->env); + auto const is_put_packed = envelopeIsPackedPutType(msg->env); + auto const is_bcast = envelopeIsBcast(msg->env); if (!is_term || vt_check_enabled(print_term_msgs)) { vt_debug_print( @@ -229,6 +230,13 @@ EventType ActiveMessenger::sendMsgBytesWithPut( ); } + vtWarnIf( + dest == theContext()->getNode() && + not is_bcast && + not theConfig()->vt_lb_self_migration, + fmt::format("Destination {} should != this node", dest) + ); + MsgSizeType new_msg_size = base.size(); if (is_put && !is_put_packed) { diff --git a/src/vt/runtime/runtime_banner.cc b/src/vt/runtime/runtime_banner.cc index 0cf7a6581e..02c9e820b5 100644 --- a/src/vt/runtime/runtime_banner.cc +++ b/src/vt/runtime/runtime_banner.cc @@ -290,9 +290,15 @@ void Runtime::printStartupBanner() { if (getAppConfig()->vt_lb) { auto f9 = opt_on("--vt_lb", "Load balancing enabled"); fmt::print("{}\t{}{}", vt_pre, f9, reset); + + auto f10 = opt_on("--vt_lb_self_migration", "Self migration enabled"); + fmt::print("{}\t{}{}", vt_pre, f10, reset); + if (getAppConfig()->vt_lb_file_name != "") { - auto f12 = fmt::format("Reading LB specification from file \"{}\"", - getAppConfig()->vt_lb_file_name); + auto f12 = fmt::format( + "Reading LB specification from file \"{}\"", + getAppConfig()->vt_lb_file_name + ); auto f11 = opt_on("--vt_lb_file_name", f12); fmt::print("{}\t{}{}", vt_pre, f11, reset); diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 6e1b0fe99d..53a198211b 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -227,19 +227,11 @@ void BaseLB::notifyNewHostNodeOfObjectsArriving( } void BaseLB::migrateObjectTo(ObjIDType const obj_id, NodeType const to) { - if (obj_id.curr_node != to) { - migrateObject(obj_id, to); + if (obj_id.curr_node != to || theConfig()->vt_lb_self_migration) { + transfers_.push_back(TransferDestType{obj_id, to}); } } -void BaseLB::migrateObjectToSelf(const ObjIDType obj_id) { - migrateObject(obj_id, obj_id.curr_node); -} - -void BaseLB::migrateObject(const ObjIDType obj_id, const NodeType to) { - transfers_.push_back(TransferDestType{obj_id, to}); -} - void BaseLB::finalize(CountMsg* msg) { auto global_count = msg->getVal(); if (migration_count_cb_) { diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index 6835ccf071..5ba85dac49 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -128,12 +128,6 @@ struct BaseLB { void migrationDone(); void migrateObjectTo(ObjIDType const obj_id, NodeType const node); - /* - * This function migrates object from and to the same node. It is used - * by SerdeTestLB for purpose of testing serialization and deserialization. - */ - void migrateObjectToSelf(ObjIDType const obj_id); - void transferSend(NodeType from, TransferVecType const& transfer); void transferMigrations(TransferMsg* msg); void finalize(CountMsg* msg); @@ -168,8 +162,6 @@ struct BaseLB { */ std::shared_ptr normalizeReassignments(); - void migrateObject(ObjIDType const obj_id, NodeType const node); - private: TransferVecType transfers_ = {}; TransferType off_node_migrate_ = {}; diff --git a/src/vt/vrt/collection/balance/serdetestlb/serdetestlb.cc b/src/vt/vrt/collection/balance/serdetestlb/serdetestlb.cc index 46aa9085bb..6566bd6e83 100644 --- a/src/vt/vrt/collection/balance/serdetestlb/serdetestlb.cc +++ b/src/vt/vrt/collection/balance/serdetestlb/serdetestlb.cc @@ -51,7 +51,12 @@ SerdeTestLB::getInputKeysWithHelp() { return std::unordered_map{}; } -void SerdeTestLB::init(objgroup::proxy::Proxy) { } +void SerdeTestLB::init(objgroup::proxy::Proxy) { + vtAssert( + theConfig()->vt_lb_self_migration, + "SerdeTestLB::init(): vt_lb_allow_self_migration flag must be set to use SerdeTestLB\n" + ); +} void SerdeTestLB::inputParams(balance::SpecEntry*) { } diff --git a/src/vt/vrt/collection/manager.impl.h b/src/vt/vrt/collection/manager.impl.h index ff9bee327e..d1e253e18c 100644 --- a/src/vt/vrt/collection/manager.impl.h +++ b/src/vt/vrt/collection/manager.impl.h @@ -1801,96 +1801,104 @@ MigrateStatus CollectionManager::migrateOut( terse, vrt_coll, "migrating from {} to {}\n", this_node, dest ); - auto const& proxy = CollectionProxy(col_proxy).operator()( - idx - ); - auto elm_holder = findElmHolder(col_proxy); - vtAssert( - elm_holder != nullptr, "Element must be registered here" - ); + if (this_node != dest || theConfig()->vt_lb_self_migration) { + auto const& proxy = CollectionProxy(col_proxy).operator()( + idx + ); + auto elm_holder = findElmHolder(col_proxy); + vtAssert( + elm_holder != nullptr, "Element must be registered here" + ); #if vt_check_enabled(runtime_checks) - { + { + bool const exists = elm_holder->exists(idx); + vtAssert( + exists, "Local element must exist here for migration to occur" + ); + } +#endif + bool const exists = elm_holder->exists(idx); - vtAssert( - exists, "Local element must exist here for migration to occur" + if (!exists) { + return MigrateStatus::ElementNotLocal; + } + + vt_debug_print( + verbose, vrt_coll, + "migrateOut: (before remove) holder numElements={}\n", + elm_holder->numElements() ); - } -#endif - bool const exists = elm_holder->exists(idx); - if (!exists) { - return MigrateStatus::ElementNotLocal; - } + if (elm_holder->numElements() == 1 and theConfig()->vt_lb_keep_last_elm) { + vt_debug_print( + normal, vrt_coll, + "migrateOut: do not migrate last element\n" + ); + return MigrateStatus::ElementNotLocal; + } - vt_debug_print( - verbose, vrt_coll, - "migrateOut: (before remove) holder numElements={}\n", - elm_holder->numElements() - ); + auto col_unique_ptr = elm_holder->remove(idx); + auto& typed_col_ref = *static_cast(col_unique_ptr.get()); - if (elm_holder->numElements() == 1 and theConfig()->vt_lb_keep_last_elm) { vt_debug_print( - normal, vrt_coll, - "migrateOut: do not migrate last element\n" + verbose, vrt_coll, + "migrateOut: (after remove) holder numElements={}\n", + elm_holder->numElements() ); - return MigrateStatus::ElementNotLocal; - } - auto col_unique_ptr = elm_holder->remove(idx); - auto& typed_col_ref = *static_cast(col_unique_ptr.get()); + /* + * Invoke the virtual prelude migrate out function + */ + col_unique_ptr->preMigrateOut(); - vt_debug_print( - verbose, vrt_coll, - "migrateOut: (after remove) holder numElements={}\n", - elm_holder->numElements() - ); + vt_debug_print( + verbose, vrt_coll, + "migrateOut: col_proxy={:x}, idx={}, dest={}: serializing collection elm\n", + col_proxy, print_index(idx), dest + ); - /* - * Invoke the virtual prelude migrate out function - */ - col_unique_ptr->preMigrateOut(); + using MigrateMsgType = MigrateMsg; - vt_debug_print( - verbose, vrt_coll, - "migrateOut: col_proxy={:x}, idx={}, dest={}: serializing collection elm\n", - col_proxy, print_index(idx), dest - ); + auto msg = makeMessage(proxy, this_node, dest, &typed_col_ref); - using MigrateMsgType = MigrateMsg; + theMsg()->sendMsg< + MigrateMsgType, MigrateHandlers::migrateInHandler + >(dest, msg); - auto msg = makeMessage(proxy, this_node, dest, &typed_col_ref); + theLocMan()->getCollectionLM(col_proxy)->entityEmigrated(idx, dest); - theMsg()->sendMsg< - MigrateMsgType, MigrateHandlers::migrateInHandler - >(dest, msg); + /* + * Invoke the virtual epilog migrate out function + */ + col_unique_ptr->epiMigrateOut(); - theLocMan()->getCollectionLM(col_proxy)->entityEmigrated(idx, dest); + vt_debug_print( + verbose, vrt_coll, + "migrateOut: col_proxy={:x}, idx={}, dest={}: invoking destroy()\n", + col_proxy, print_index(idx), dest + ); - /* - * Invoke the virtual epilog migrate out function - */ - col_unique_ptr->epiMigrateOut(); + /* + * Invoke the virtual destroy function and then null std::unique_ptr, + * which should cause the destructor to fire + */ + col_unique_ptr->destroy(); + col_unique_ptr = nullptr; - vt_debug_print( - verbose, vrt_coll, - "migrateOut: col_proxy={:x}, idx={}, dest={}: invoking destroy()\n", - col_proxy, print_index(idx), dest - ); + auto const home_node = getMappedNode(col_proxy, idx); + elm_holder->applyListeners( + listener::ElementEventEnum::ElementMigratedOut, idx, home_node + ); - /* - * Invoke the virtual destroy function and then null std::unique_ptr, - * which should cause the destructor to fire - */ - col_unique_ptr->destroy(); - col_unique_ptr = nullptr; - - auto const home_node = getMappedNode(col_proxy, idx); - elm_holder->applyListeners( - listener::ElementEventEnum::ElementMigratedOut, idx, home_node - ); + return MigrateStatus::MigratedToRemote; + } + +#if vt_check_enabled(runtime_checks) + vtAssert(false, "Migration should only be called when to_node is != this_node"); +#endif - return MigrateStatus::MigratedToRemote; + return MigrateStatus::NoMigrationNecessary; } template