From d348c17922dc10b38c22854e90a75217e07043a8 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Thu, 11 Aug 2022 17:27:37 +0800 Subject: [PATCH 1/3] add dp on gcn --- benchmark/FedHPOB/scripts/gcn/cora_dp.yaml | 41 +++++++++++++++++++ .../FedHPOB/scripts/gcn/run_hpo_cora_dp.sh | 34 +++++++++++++++ federatedscope/core/trainers/trainer_nbafl.py | 5 ++- 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 benchmark/FedHPOB/scripts/gcn/cora_dp.yaml create mode 100644 benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh diff --git a/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml b/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml new file mode 100644 index 000000000..5d220925b --- /dev/null +++ b/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml @@ -0,0 +1,41 @@ +use_gpu: True +device: 0 +early_stop: + patience: 100 +seed: 12345 +federate: + mode: standalone + make_global_eval: True + client_num: 5 + total_round_num: 500 + share_local_model: True + online_aggr: True +data: + root: data/ + type: cora + splitter: 'louvain' + batch_size: 1 +model: + type: gcn + hidden: 64 + dropout: 0.5 + out_channels: 7 +criterion: + type: CrossEntropyLoss +train: + local_update_steps: 1 + optimizer: + lr: 0.25 + weight_decay: 0.0005 +trainer: + type: nodefullbatch_trainer +eval: + freq: 1 + metrics: ['acc', 'correct', 'f1'] + split: ['test', 'val', 'train'] +nbafl: + use: True + mu: 0.0 + w_clip: 0.1 + epsilon: 2000 + constant: 1 \ No newline at end of file diff --git a/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh b/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh new file mode 100644 index 000000000..5d8ec3de5 --- /dev/null +++ b/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh @@ -0,0 +1,34 @@ +set -e + +cudaid=$1 +sample_num=$2 +eps=$3 + +# eps = (5000, 500, 50) + +cd ../../../.. + +dataset=cora + +out_dir=out_${dataset}_dp${eps} + +echo "HPO starts..." + +lrs=(0.01 0.01668 0.02783 0.04642 0.07743 0.12915 0.21544 0.35938 0.59948 1.0) +wds=(0.0 0.001 0.01 0.1) +dps=(0.0 0.5) +steps=(1 2 3 4 5 6 7 8) + +for ((l = 0; l < ${#lrs[@]}; l++)); do + for ((w = 0; w < ${#wds[@]}; w++)); do + for ((d = 0; d < ${#dps[@]}; d++)); do + for ((s = 0; s < ${#steps[@]}; s++)); do + for k in {1..3}; do + python federatedscope/main.py --cfg benchmark/FedHPOB/scripts/gcn/cora_dp.yaml device $cudaid train.optimizer.lr ${lrs[$l]} train.optimizer.weight_decay ${wds[$w]} model.dropout ${dps[$d]} train.local_update_steps ${steps[$s]} federate.sample_client_num $sample_num seed $k outdir ${out_dir}/${sample_num} expname lr${lrs[$l]}_wd${wds[$w]}_dropout${dps[$d]}_step${steps[$s]}_seed${k} >/dev/null 2>&1 + done + done + done + done +done + +echo "HPO ends." diff --git a/federatedscope/core/trainers/trainer_nbafl.py b/federatedscope/core/trainers/trainer_nbafl.py index ea62002a0..c1cf9c5b3 100644 --- a/federatedscope/core/trainers/trainer_nbafl.py +++ b/federatedscope/core/trainers/trainer_nbafl.py @@ -109,8 +109,9 @@ def inject_noise_in_broadcast(cfg, sample_client_num, model): # Clip weight for p in model.parameters(): - p.data = p.data / torch.max(torch.ones(size=p.shape), - torch.abs(p.data) / cfg.nbafl.w_clip) + p.data = p.data / torch.max( + torch.ones(size=p.shape).to(p.data.device), + torch.abs(p.data) / cfg.nbafl.w_clip) if len(sample_client_num) > 0: # Inject noise From 7d1d45b4afa3955cf7be711c327190ba7c8ebc55 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Thu, 11 Aug 2022 18:00:10 +0800 Subject: [PATCH 2/3] update device --- federatedscope/core/trainers/trainer_nbafl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/federatedscope/core/trainers/trainer_nbafl.py b/federatedscope/core/trainers/trainer_nbafl.py index c1cf9c5b3..d96952183 100644 --- a/federatedscope/core/trainers/trainer_nbafl.py +++ b/federatedscope/core/trainers/trainer_nbafl.py @@ -110,7 +110,7 @@ def inject_noise_in_broadcast(cfg, sample_client_num, model): # Clip weight for p in model.parameters(): p.data = p.data / torch.max( - torch.ones(size=p.shape).to(p.data.device), + torch.ones(size=p.shape, device=p.data.device), torch.abs(p.data) / cfg.nbafl.w_clip) if len(sample_client_num) > 0: From 423c9a4458ce5e7ed4a988232e8714827611d546 Mon Sep 17 00:00:00 2001 From: rayrayraykk <18007356109@163.com> Date: Tue, 16 Aug 2022 15:49:26 +0800 Subject: [PATCH 3/3] enable gnn with nbafl --- benchmark/FedHPOB/scripts/gcn/citeseer.yaml | 1 + benchmark/FedHPOB/scripts/gcn/cora.yaml | 1 + benchmark/FedHPOB/scripts/gcn/cora_dp.yaml | 6 +++--- benchmark/FedHPOB/scripts/gcn/pubmed.yaml | 1 + benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh | 2 +- federatedscope/core/trainers/trainer_nbafl.py | 9 ++++++--- federatedscope/core/workers/client.py | 3 +++ .../gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml | 1 + .../gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml | 1 + federatedscope/gfl/baseline/fedavg_on_cSBM.yaml | 1 + .../gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml | 1 + federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml | 1 + .../gfl/baseline/local_gnn_node_fullbatch_citation.yaml | 1 + 13 files changed, 22 insertions(+), 7 deletions(-) diff --git a/benchmark/FedHPOB/scripts/gcn/citeseer.yaml b/benchmark/FedHPOB/scripts/gcn/citeseer.yaml index 51da2333d..9f12b9b62 100644 --- a/benchmark/FedHPOB/scripts/gcn/citeseer.yaml +++ b/benchmark/FedHPOB/scripts/gcn/citeseer.yaml @@ -21,6 +21,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 6 + task: node optimizer: lr: 0.25 weight_decay: 0.0005 diff --git a/benchmark/FedHPOB/scripts/gcn/cora.yaml b/benchmark/FedHPOB/scripts/gcn/cora.yaml index e1d4e79b7..854fa3e79 100644 --- a/benchmark/FedHPOB/scripts/gcn/cora.yaml +++ b/benchmark/FedHPOB/scripts/gcn/cora.yaml @@ -21,6 +21,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 7 + task: node optimizer: lr: 0.25 weight_decay: 0.0005 diff --git a/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml b/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml index 5d220925b..e8d8ac973 100644 --- a/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml +++ b/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml @@ -8,8 +8,7 @@ federate: make_global_eval: True client_num: 5 total_round_num: 500 - share_local_model: True - online_aggr: True + join_in_info: ['num_sample'] data: root: data/ type: cora @@ -20,6 +19,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 7 + task: node criterion: type: CrossEntropyLoss train: @@ -37,5 +37,5 @@ nbafl: use: True mu: 0.0 w_clip: 0.1 - epsilon: 2000 + epsilon: 20 constant: 1 \ No newline at end of file diff --git a/benchmark/FedHPOB/scripts/gcn/pubmed.yaml b/benchmark/FedHPOB/scripts/gcn/pubmed.yaml index a8b5d051b..4eb7a509f 100644 --- a/benchmark/FedHPOB/scripts/gcn/pubmed.yaml +++ b/benchmark/FedHPOB/scripts/gcn/pubmed.yaml @@ -21,6 +21,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 3 + task: node optimizer: lr: 0.25 weight_decay: 0.0005 diff --git a/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh b/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh index 5d8ec3de5..18ba8086b 100644 --- a/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh +++ b/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh @@ -4,7 +4,7 @@ cudaid=$1 sample_num=$2 eps=$3 -# eps = (5000, 500, 50) +# eps = (1, 10, 20) cd ../../../.. diff --git a/federatedscope/core/trainers/trainer_nbafl.py b/federatedscope/core/trainers/trainer_nbafl.py index d96952183..53959b2fe 100644 --- a/federatedscope/core/trainers/trainer_nbafl.py +++ b/federatedscope/core/trainers/trainer_nbafl.py @@ -63,8 +63,12 @@ def init_nbafl_ctx(base_trainer): ctx.regularizer = get_regularizer(cfg.regularizer.type) # set noise scale during upload + if cfg.trainer.type == 'nodefullbatch_trainer': + num_train_data = sum(ctx.train_loader.dataset[0]['train_mask']) + else: + num_train_data = ctx.num_train_data ctx.nbafl_scale_u = cfg.nbafl.w_clip * cfg.federate.total_round_num * \ - cfg.nbafl.constant / ctx.num_train_data / \ + cfg.nbafl.constant / num_train_data / \ cfg.nbafl.epsilon @@ -112,7 +116,6 @@ def inject_noise_in_broadcast(cfg, sample_client_num, model): p.data = p.data / torch.max( torch.ones(size=p.shape, device=p.data.device), torch.abs(p.data) / cfg.nbafl.w_clip) - if len(sample_client_num) > 0: # Inject noise L = cfg.federate.sample_client_num if cfg.federate.sample_client_num\ @@ -121,7 +124,7 @@ def inject_noise_in_broadcast(cfg, sample_client_num, model): scale_d = 2 * cfg.nbafl.w_clip * cfg.nbafl.constant * np.sqrt( np.power(cfg.federate.total_round_num, 2) - np.power(L, 2) * cfg.federate.client_num) / ( - min(sample_client_num.values()) * cfg.federate.client_num * + min(sample_client_num) * cfg.federate.client_num * cfg.nbafl.epsilon) for p in model.parameters(): p.data += get_random("Normal", p.shape, { diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index 86a399a3a..2dbc4bdd7 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -412,6 +412,9 @@ def callback_funcs_for_join_in_info(self, message: Message): num_sample = self._cfg.train.local_update_steps * \ self.trainer.ctx.num_train_batch join_in_info['num_sample'] = num_sample + if self._cfg.trainer.type == 'nodefullbatch_trainer': + join_in_info['num_sample'] = \ + self.trainer.ctx.data.x.shape[0] elif requirement.lower() == 'client_resource': assert self.comm_bandwidth is not None and self.comp_speed \ is not None, "The requirement join_in_info " \ diff --git a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml index d3073ea4a..09285bf90 100644 --- a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml +++ b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml @@ -16,6 +16,7 @@ model: type: gcn hidden: 1024 out_channels: 4 + task: node train: optimizer: lr: 0.05 diff --git a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml index dcf8b84f3..d5bc2124c 100644 --- a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml @@ -18,6 +18,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 7 + task: node train: local_update_steps: 4 optimizer: diff --git a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml index 33cf95ee6..052b8f006 100644 --- a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml +++ b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml @@ -17,6 +17,7 @@ model: type: gpr hidden: 256 out_channels: 2 + task: node #personalization: #local_param: ['prop1'] train: diff --git a/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml b/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml index 1e21255fd..840196e98 100644 --- a/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml +++ b/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml @@ -16,6 +16,7 @@ model: type: sage hidden: 1024 out_channels: 4 + task: node train: local_update_steps: 16 optimizer: diff --git a/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml b/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml index 051aa40e8..1672c5c98 100644 --- a/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml +++ b/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml @@ -16,6 +16,7 @@ model: type: gpr hidden: 256 out_channels: 2 + task: node personalization: local_param: ['prop1'] train: diff --git a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml index e13b7c6ab..6c960bd89 100644 --- a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml @@ -17,6 +17,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 7 + task: node train: optimizer: lr: 0.05