Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… support_mutable_attributes
  • Loading branch information
Charles-hit committed Aug 31, 2023
2 parents 8bb28a3 + 51ba2a0 commit 8cc333a
Show file tree
Hide file tree
Showing 442 changed files with 9,343 additions and 4,688 deletions.
6 changes: 3 additions & 3 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ clang-analyzer-cplusplus.InnerPointer,
-clang-analyzer-nullability.NullableDereferenced,
-clang-analyzer-nullability.NullablePassedToNonnull,
-clang-analyzer-nullability.NullableReturnedFromNonnull,
-clang-analyzer-optin.cplusplus.UninitializedObject,
clang-analyzer-optin.cplusplus.UninitializedObject,
-clang-analyzer-optin.cplusplus.VirtualCall,
-clang-analyzer-optin.mpi.MPI-Checker,
-clang-analyzer-optin.osx.OSObjectCStyleCast,
Expand Down Expand Up @@ -162,15 +162,15 @@ cppcoreguidelines-c-copy-assignment-signature,
-cppcoreguidelines-pro-type-member-init,
-cppcoreguidelines-slicing,
-hicpp-avoid-goto,
-hicpp-exception-baseclass,
hicpp-exception-baseclass,
misc-unused-alias-decls,
misc-unused-using-decls,
modernize-avoid-bind,
modernize-avoid-c-arrays,
-modernize-deprecated-headers,
-modernize-deprecated-ios-base-aliases,
modernize-loop-convert,
-modernize-make-shared,
modernize-make-shared,
modernize-make-unique,
-modernize-pass-by-value,
modernize-raw-string-literal,
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,4 @@ paddle/phi/kernels/fusion/cutlass/conv2d/generated/*
python/paddle/fluid/incubate/fleet/parameter_server/pslib/ps_pb2.py
paddle/fluid/ir_adaptor/translator/op_compat_info.cc
paddle/fluid/pybind/static_op_function.*
paddle/fluid/pybind/ops_api.cc
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation"
ON)
option(WITH_CPP_DIST "Install PaddlePaddle C++ distribution" OFF)
option(WITH_GFLAGS "Compile PaddlePaddle with gflags support" OFF)
################################ Internal Configurations #######################################
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
Expand Down
10 changes: 10 additions & 0 deletions cmake/cinn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ if(WITH_MKL)
endif()
endif()

if(NOT WITH_GFLAGS)
target_link_libraries(cinnapi gflags)
add_dependencies(cinnapi gflags)
endif()

if(WITH_GPU)
target_link_libraries(
cinnapi
Expand Down Expand Up @@ -237,6 +242,11 @@ function(gen_cinncore LINKTYPE)
endif()
endif()

if(NOT WITH_GFLAGS)
target_link_libraries(${CINNCORE_TARGET} gflags)
add_dependencies(${CINNCORE_TARGET} gflags)
endif()

if(WITH_GPU)
target_link_libraries(
${CINNCORE_TARGET}
Expand Down
4 changes: 4 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ if(WITH_DISTRIBUTE)
add_definitions(-DPADDLE_WITH_DISTRIBUTE)
endif()

if(WITH_GFLAGS)
add_definitions(-DPADDLE_WITH_GFLAGS)
endif()

if(WITH_PSCORE)
add_definitions(-DPADDLE_WITH_PSCORE)
endif()
Expand Down
13 changes: 13 additions & 0 deletions cmake/external/brpc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,16 @@ add_dependencies(brpc extern_brpc)
add_definitions(-DBRPC_WITH_GLOG)

list(APPEND external_project_dependencies brpc)

set(EXTERNAL_BRPC_DEPS
brpc
protobuf
ssl
crypto
leveldb
glog
snappy)

if(NOT WITH_GFLAGS)
set(EXTERNAL_BRPC_DEPS ${EXTERNAL_BRPC_DEPS} gflags)
endif()
11 changes: 11 additions & 0 deletions cmake/external/gflags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,14 @@ if(WIN32)
set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
endif()
endif()

# We have implemented a custom flags tool paddle_flags to replace gflags.
# User can also choose to use gflags by setting WITH_GFLAGS=ON. But when
# using paddle_flags, gflags is also needed for other third party libraries
# including glog and brpc. So we can not remove gflags completely.
set(flags_dep)
if(WITH_GFLAGS)
list(APPEND flags_dep gflags)
else()
list(APPEND flags_dep paddle_flags)
endif()
11 changes: 11 additions & 0 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,22 @@ copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/)

if(NOT WITH_GFLAGS)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags_native.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
endif()

# the include path of phi needs to be changed to adapt to inference api path
add_custom_command(
TARGET inference_lib_dist
Expand Down
1 change: 1 addition & 0 deletions paddle/cinn/hlir/dialect/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
generated/**
generated/*
4 changes: 0 additions & 4 deletions paddle/fluid/distributed/auto_parallel/spmd_rules/rules.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.h"
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/embedding_spmd_rule.h"
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/layer_norm_spmd_rule.h"
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/matmul_spmd_rule.h"
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/reduction_spmd_rule.h"
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/replicated_spmd_rule.h"
#include "paddle/fluid/distributed/auto_parallel/spmd_rules/reshape_spmd_rule.h"
Expand All @@ -32,9 +31,6 @@ namespace paddle {
namespace distributed {
namespace auto_parallel {

// matmul rule
REGISTER_SPMD_RULE(matmul, MatmulSPMDRule);

// reduction rules
REGISTER_SPMD_RULE(all, ReductionSPMDRule);
REGISTER_SPMD_RULE(amax, ReductionSPMDRule);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

constexpr int64_t kWaitBlockTImeout = 10;

DECLARE_bool(use_stream_safe_cuda_allocator);
PD_DECLARE_bool(use_stream_safe_cuda_allocator);

namespace paddle {
namespace distributed {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/collective/process_group_nccl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#include "paddle/phi/core/distributed/comm_context_manager.h"

PHI_DECLARE_bool(nccl_blocking_wait);
DECLARE_bool(use_stream_safe_cuda_allocator);
PD_DECLARE_bool(use_stream_safe_cuda_allocator);

// set this flag to `true` and recompile to enable dynamic checks
constexpr bool FLAGS_enable_nccl_dynamic_check = false;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "paddle/phi/backends/device_manager.h"
#include "paddle/phi/core/flags.h"

DECLARE_bool(use_stream_safe_cuda_allocator);
PD_DECLARE_bool(use_stream_safe_cuda_allocator);
PHI_DECLARE_string(allocator_strategy);

namespace paddle {
Expand Down
11 changes: 1 addition & 10 deletions paddle/fluid/distributed/fleet_executor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,7 @@ proto_library(interceptor_message_proto SRCS interceptor_message.proto)
if(WITH_ARM_BRPC)
set(BRPC_DEPS arm_brpc snappy phi glog)
elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
set(BRPC_DEPS
brpc
ssl
crypto
protobuf
zlib
leveldb
snappy
phi
glog)
set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} zlib phi)
else()
set(BRPC_DEPS "")
endif()
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/distributed/fleet_executor/carrier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include <algorithm>
#include <vector>

#include "gflags/gflags.h"
#include "paddle/fluid/distributed/fleet_executor/global.h"
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
Expand All @@ -29,6 +28,7 @@
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/flags.h"
#include "paddle/utils/flags.h"
PADDLE_DEFINE_EXPORTED_bool(
fleet_executor_with_standalone,
false,
Expand Down
27 changes: 2 additions & 25 deletions paddle/fluid/distributed/ps/service/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,11 @@ set_source_files_properties(${BRPC_SRCS})

if(WITH_HETERPS)

set(BRPC_DEPS
brpc
ssl
crypto
protobuf
phi
glog
zlib
leveldb
snappy
glog
device_context
rocksdb)
set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi zlib device_context rocksdb)

else()

set(BRPC_DEPS
brpc
ssl
crypto
protobuf
phi
glog
zlib
leveldb
snappy
glog
device_context)
set(BRPC_DEPS ${EXTERNAL_BRPC_DEPS} phi zlib device_context)

endif()

Expand Down
79 changes: 41 additions & 38 deletions paddle/fluid/distributed/ps/service/brpc_ps_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,49 +34,53 @@ class Variable;
namespace paddle {
namespace distributed {

DEFINE_int32(pserver_push_dense_merge_limit,
12,
"limit max push_dense local merge requests");
PD_DEFINE_int32(pserver_push_dense_merge_limit,
12,
"limit max push_dense local merge requests");

DEFINE_int32(pserver_push_sparse_merge_limit,
12,
"limit max push_sparse local merge requests");
PD_DEFINE_int32(pserver_push_sparse_merge_limit,
12,
"limit max push_sparse local merge requests");

DEFINE_int32(pserver_pull_dense_limit,
12,
"limit max push_sparse local merge requests");
PD_DEFINE_int32(pserver_pull_dense_limit,
12,
"limit max push_sparse local merge requests");

DEFINE_int32(pserver_async_push_dense_interval_ms,
10,
"async push_dense to server interval");
PD_DEFINE_int32(pserver_async_push_dense_interval_ms,
10,
"async push_dense to server interval");

DEFINE_int32(pserver_async_push_sparse_interval_ms,
10,
"async push_sparse to server interval");
PD_DEFINE_int32(pserver_async_push_sparse_interval_ms,
10,
"async push_sparse to server interval");

DEFINE_bool(pserver_scale_gradient_by_merge,
false,
"scale dense gradient when merged");
PD_DEFINE_bool(pserver_scale_gradient_by_merge,
false,
"scale dense gradient when merged");

DEFINE_int32(pserver_communicate_compress_type,
0,
"none:0 snappy:1 gzip:2 zlib:3 lz4:4");
PD_DEFINE_int32(pserver_communicate_compress_type,
0,
"none:0 snappy:1 gzip:2 zlib:3 lz4:4");

DEFINE_int32(pserver_max_async_call_num,
13,
"max task num in async_call_server");
PD_DEFINE_int32(pserver_max_async_call_num,
13,
"max task num in async_call_server");

DEFINE_int32(pserver_timeout_ms, 500000, "pserver request server timeout_ms");
PD_DEFINE_int32(pserver_timeout_ms,
500000,
"pserver request server timeout_ms");

DEFINE_int32(pserver_connect_timeout_ms,
10000,
"pserver connect server timeout_ms");
PD_DEFINE_int32(pserver_connect_timeout_ms,
10000,
"pserver connect server timeout_ms");

DEFINE_int32(pserver_sparse_merge_thread, 1, "pserver sparse merge thread num");
PD_DEFINE_int32(pserver_sparse_merge_thread,
1,
"pserver sparse merge thread num");

DEFINE_int32(pserver_sparse_table_shard_num,
1000,
"sparse table shard for save & load");
PD_DEFINE_int32(pserver_sparse_table_shard_num,
1000,
"sparse table shard for save & load");

inline size_t get_sparse_shard(uint32_t shard_num,
uint32_t server_num,
Expand Down Expand Up @@ -140,7 +144,7 @@ int32_t BrpcPsClient::StartFlClientService(const std::string &self_endpoint) {

if (_fl_server.Start(self_endpoint.c_str(), &options) != 0) {
VLOG(0) << "fl-ps > StartFlClientService failed. Try again.";
auto ip_port = paddle::string::Split(self_endpoint, ':');
auto ip_port = ::paddle::string::Split(self_endpoint, ':');
std::string ip = ip_port[0];
int port = std::stoi(ip_port[1]);
std::string int_ip_port = GetIntTypeEndpoint(ip, port);
Expand Down Expand Up @@ -202,8 +206,7 @@ int32_t BrpcPsClient::InitializeFlWorker(const std::string &self_endpoint) {
options.protocol = "baidu_std";
options.timeout_ms = FLAGS_pserver_timeout_ms;
options.connection_type = "pooled";
options.connect_timeout_ms =
paddle::distributed::FLAGS_pserver_connect_timeout_ms;
options.connect_timeout_ms = FLAGS_pserver_connect_timeout_ms;
options.max_retry = 3;
// 获取 coordinator 列表,并连接
std::string coordinator_ip_port;
Expand Down Expand Up @@ -336,11 +339,11 @@ int32_t BrpcPsClient::Initialize() {
auto table_id = worker_param.downpour_table_param(i).table_id();
if (type == PS_DENSE_TABLE) {
_push_dense_task_queue_map[table_id] =
paddle::framework::MakeChannel<DenseAsyncTask *>();
::paddle::framework::MakeChannel<DenseAsyncTask *>();
}
if (type == PS_SPARSE_TABLE) {
_push_sparse_task_queue_map[table_id] =
paddle::framework::MakeChannel<SparseAsyncTask *>();
::paddle::framework::MakeChannel<SparseAsyncTask *>();
_push_sparse_merge_count_map[table_id] = 0;
}
}
Expand Down Expand Up @@ -446,7 +449,7 @@ std::future<int32_t> BrpcPsClient::PrintTableStat(uint32_t table_id) {
int ret = 0;
uint64_t feasign_size = 0;
uint64_t mf_size = 0;
paddle::framework::BinaryArchive ar;
::paddle::framework::BinaryArchive ar;
auto *closure = reinterpret_cast<DownpourBrpcClosure *>(done);
for (size_t i = 0; i < request_call_num; ++i) {
if (closure->check_response(i, PS_PRINT_TABLE_STAT) != 0) {
Expand Down
Loading

0 comments on commit 8cc333a

Please sign in to comment.