From 9ad79ecdd041fa4f36e0c268f5a1eb0a687e5afa Mon Sep 17 00:00:00 2001 From: "yuexiang.xyx" Date: Wed, 28 Sep 2022 19:54:28 +0800 Subject: [PATCH] minor fix and doc for cross backend fl --- .../core/auxiliaries/trainer_builder.py | 3 +- .../core/proto/gRPC_comm_manager_pb2_grpc.py | 2 +- federatedscope/core/workers/client.py | 11 +++++-- federatedscope/core/workers/server.py | 11 +++++-- federatedscope/cross_backends/README.md | 30 +++++++++++++++++++ federatedscope/cross_backends/__init__.py | 4 +++ .../distributed_tf_client_3.yaml | 24 +++++++++++++++ .../cross_backends/distributed_tf_server.yaml | 22 ++++++++++++++ 8 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 federatedscope/cross_backends/README.md create mode 100644 federatedscope/cross_backends/distributed_tf_client_3.yaml create mode 100644 federatedscope/cross_backends/distributed_tf_server.yaml diff --git a/federatedscope/core/auxiliaries/trainer_builder.py b/federatedscope/core/auxiliaries/trainer_builder.py index c8cd4d5d7..41d3ffe89 100644 --- a/federatedscope/core/auxiliaries/trainer_builder.py +++ b/federatedscope/core/auxiliaries/trainer_builder.py @@ -45,7 +45,8 @@ def get_trainer(model=None, only_for_eval=only_for_eval, monitor=monitor) elif config.backend == 'tensorflow': - from federatedscope.core.trainers import GeneralTFTrainer + from federatedscope.core.trainers.tf_trainer import \ + GeneralTFTrainer trainer = GeneralTFTrainer(model=model, data=data, device=device, diff --git a/federatedscope/core/proto/gRPC_comm_manager_pb2_grpc.py b/federatedscope/core/proto/gRPC_comm_manager_pb2_grpc.py index 85a30225b..940554913 100644 --- a/federatedscope/core/proto/gRPC_comm_manager_pb2_grpc.py +++ b/federatedscope/core/proto/gRPC_comm_manager_pb2_grpc.py @@ -2,7 +2,7 @@ """Client and server classes corresponding to protobuf-defined services.""" import grpc -import federatedscope.core.proto.gRPC_comm_manager_pb2 \ +from federatedscope.core.proto import gRPC_comm_manager_pb2 \ as gRPC__comm__manager__pb2 diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index 2dbc4bdd7..41a1ed980 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -104,8 +104,15 @@ def __init__(self, else: self.comp_speed = None self.comm_bandwidth = None - self.model_size = sys.getsizeof(pickle.dumps( - self.model)) / 1024.0 * 8. # kbits + + if self._cfg.backend == 'torch': + self.model_size = sys.getsizeof(pickle.dumps( + self.model)) / 1024.0 * 8. # kbits + else: + # TODO: calculate model size for TF Model + self.model_size = 1.0 + logger.warning(f'The calculation of model size in backend:' + f'{self._cfg.backend} is not provided.') # Initialize communication manager self.server_id = server_id diff --git a/federatedscope/core/workers/server.py b/federatedscope/core/workers/server.py index 38ceb6435..59c5a514a 100644 --- a/federatedscope/core/workers/server.py +++ b/federatedscope/core/workers/server.py @@ -764,8 +764,15 @@ def trigger_for_start(self): for client_index in np.arange(1, self.client_num + 1) ] else: - model_size = sys.getsizeof(pickle.dumps( - self.model)) / 1024.0 * 8. + if self._cfg.backend == 'torch': + model_size = sys.getsizeof(pickle.dumps( + self.model)) / 1024.0 * 8. + else: + # TODO: calculate model size for TF Model + model_size = 1.0 + logger.warning(f'The calculation of model size in backend:' + f'{self._cfg.backend} is not provided.') + client_resource = [ model_size / float(x['communication']) + float(x['computation']) / 1000. diff --git a/federatedscope/cross_backends/README.md b/federatedscope/cross_backends/README.md new file mode 100644 index 000000000..451243dcb --- /dev/null +++ b/federatedscope/cross_backends/README.md @@ -0,0 +1,30 @@ +## Cross-Backend Federated Learning + +We provide an example for constructing cross-backend (Tensorflow and PyTorch) federated learning, which trains an LR model on the synthetic toy data. + +The server runs with Tensorflow, and clients run with PyTorch (the suggested version of Tensorflow is 1.12.0): +```shell script +# Generate toy data +python ../../scripts/distributed_scripts/gen_data.py +# Server +python ../main.py --cfg distributed_tf_server.yaml + +# Clients +python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml +python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml +python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml +``` + +One of the client runs with Tensorflow, and the server and other clients run with PyTorch: +```shell script +# Generate toy data +python ../../scripts/distributed_scripts/gen_data.py +# Server +python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_server.yaml + +# Clients with Pytorch +python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml +python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml +# Clients with Tensorflow +python ../main.py --cfg distributed_tf_client_3.yaml +``` diff --git a/federatedscope/cross_backends/__init__.py b/federatedscope/cross_backends/__init__.py index e69de29bb..468829142 100644 --- a/federatedscope/cross_backends/__init__.py +++ b/federatedscope/cross_backends/__init__.py @@ -0,0 +1,4 @@ +from federatedscope.cross_backends.tf_lr import LogisticRegression +from federatedscope.cross_backends.tf_aggregator import FedAvgAggregator + +__all__ = ['LogisticRegression', 'FedAvgAggregator'] diff --git a/federatedscope/cross_backends/distributed_tf_client_3.yaml b/federatedscope/cross_backends/distributed_tf_client_3.yaml new file mode 100644 index 000000000..61792c2f5 --- /dev/null +++ b/federatedscope/cross_backends/distributed_tf_client_3.yaml @@ -0,0 +1,24 @@ +use_gpu: False +backend: 'tensorflow' +federate: + client_num: 3 + mode: 'distributed' + total_round_num: 20 + make_global_eval: False + online_aggr: False +distribute: + use: True + server_host: '127.0.0.1' + server_port: 50051 + client_host: '127.0.0.1' + client_port: 50054 + role: 'client' + data_file: 'toy_data/client_3_data' +trainer: + type: 'general' +eval: + freq: 10 +data: + type: 'toy' +model: + type: 'lr' \ No newline at end of file diff --git a/federatedscope/cross_backends/distributed_tf_server.yaml b/federatedscope/cross_backends/distributed_tf_server.yaml new file mode 100644 index 000000000..cd1b23c43 --- /dev/null +++ b/federatedscope/cross_backends/distributed_tf_server.yaml @@ -0,0 +1,22 @@ +use_gpu: False +backend: 'tensorflow' +federate: + client_num: 3 + mode: 'distributed' + total_round_num: 20 + make_global_eval: True + online_aggr: False +distribute: + use: True + server_host: '127.0.0.1' + server_port: 50051 + role: 'server' + data_file: 'toy_data/server_data' +trainer: + type: 'general' +eval: + freq: 10 +data: + type: 'toy' +model: + type: 'lr' \ No newline at end of file