Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor fix and doc for cross backend fl #387

Merged
merged 1 commit into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion federatedscope/core/auxiliaries/trainer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def get_trainer(model=None,
only_for_eval=only_for_eval,
monitor=monitor)
elif config.backend == 'tensorflow':
from federatedscope.core.trainers import GeneralTFTrainer
from federatedscope.core.trainers.tf_trainer import \
GeneralTFTrainer
trainer = GeneralTFTrainer(model=model,
data=data,
device=device,
Expand Down
2 changes: 1 addition & 1 deletion federatedscope/core/proto/gRPC_comm_manager_pb2_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Client and server classes corresponding to protobuf-defined services."""
import grpc

import federatedscope.core.proto.gRPC_comm_manager_pb2 \
from federatedscope.core.proto import gRPC_comm_manager_pb2 \
as gRPC__comm__manager__pb2


Expand Down
11 changes: 9 additions & 2 deletions federatedscope/core/workers/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,15 @@ def __init__(self,
else:
self.comp_speed = None
self.comm_bandwidth = None
self.model_size = sys.getsizeof(pickle.dumps(
self.model)) / 1024.0 * 8. # kbits

if self._cfg.backend == 'torch':
self.model_size = sys.getsizeof(pickle.dumps(
self.model)) / 1024.0 * 8. # kbits
else:
# TODO: calculate model size for TF Model
self.model_size = 1.0
logger.warning(f'The calculation of model size in backend:'
f'{self._cfg.backend} is not provided.')

# Initialize communication manager
self.server_id = server_id
Expand Down
11 changes: 9 additions & 2 deletions federatedscope/core/workers/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,8 +764,15 @@ def trigger_for_start(self):
for client_index in np.arange(1, self.client_num + 1)
]
else:
model_size = sys.getsizeof(pickle.dumps(
self.model)) / 1024.0 * 8.
if self._cfg.backend == 'torch':
model_size = sys.getsizeof(pickle.dumps(
self.model)) / 1024.0 * 8.
else:
# TODO: calculate model size for TF Model
model_size = 1.0
logger.warning(f'The calculation of model size in backend:'
f'{self._cfg.backend} is not provided.')

client_resource = [
model_size / float(x['communication']) +
float(x['computation']) / 1000.
Expand Down
30 changes: 30 additions & 0 deletions federatedscope/cross_backends/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
## Cross-Backend Federated Learning

We provide an example for constructing cross-backend (Tensorflow and PyTorch) federated learning, which trains an LR model on the synthetic toy data.

The server runs with Tensorflow, and clients run with PyTorch (the suggested version of Tensorflow is 1.12.0):
```shell script
# Generate toy data
python ../../scripts/distributed_scripts/gen_data.py
# Server
python ../main.py --cfg distributed_tf_server.yaml

# Clients
python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml
python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml
python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_3.yaml
```

One of the client runs with Tensorflow, and the server and other clients run with PyTorch:
```shell script
# Generate toy data
python ../../scripts/distributed_scripts/gen_data.py
# Server
python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_server.yaml

# Clients with Pytorch
python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_1.yaml
python ../main.py --cfg ../../scripts/distributed_scripts/distributed_configs/distributed_client_2.yaml
# Clients with Tensorflow
python ../main.py --cfg distributed_tf_client_3.yaml
```
4 changes: 4 additions & 0 deletions federatedscope/cross_backends/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from federatedscope.cross_backends.tf_lr import LogisticRegression
from federatedscope.cross_backends.tf_aggregator import FedAvgAggregator

__all__ = ['LogisticRegression', 'FedAvgAggregator']
24 changes: 24 additions & 0 deletions federatedscope/cross_backends/distributed_tf_client_3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use_gpu: False
backend: 'tensorflow'
federate:
client_num: 3
mode: 'distributed'
total_round_num: 20
make_global_eval: False
online_aggr: False
distribute:
use: True
server_host: '127.0.0.1'
server_port: 50051
client_host: '127.0.0.1'
client_port: 50054
role: 'client'
data_file: 'toy_data/client_3_data'
trainer:
type: 'general'
eval:
freq: 10
data:
type: 'toy'
model:
type: 'lr'
22 changes: 22 additions & 0 deletions federatedscope/cross_backends/distributed_tf_server.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use_gpu: False
backend: 'tensorflow'
federate:
client_num: 3
mode: 'distributed'
total_round_num: 20
make_global_eval: True
online_aggr: False
distribute:
use: True
server_host: '127.0.0.1'
server_port: 50051
role: 'server'
data_file: 'toy_data/server_data'
trainer:
type: 'general'
eval:
freq: 10
data:
type: 'toy'
model:
type: 'lr'