diff --git a/benchmark/FedHPOB/scripts/gcn/citeseer.yaml b/benchmark/FedHPOB/scripts/gcn/citeseer.yaml index 51da2333d..9f12b9b62 100644 --- a/benchmark/FedHPOB/scripts/gcn/citeseer.yaml +++ b/benchmark/FedHPOB/scripts/gcn/citeseer.yaml @@ -21,6 +21,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 6 + task: node optimizer: lr: 0.25 weight_decay: 0.0005 diff --git a/benchmark/FedHPOB/scripts/gcn/cora.yaml b/benchmark/FedHPOB/scripts/gcn/cora.yaml index e1d4e79b7..854fa3e79 100644 --- a/benchmark/FedHPOB/scripts/gcn/cora.yaml +++ b/benchmark/FedHPOB/scripts/gcn/cora.yaml @@ -21,6 +21,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 7 + task: node optimizer: lr: 0.25 weight_decay: 0.0005 diff --git a/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml b/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml new file mode 100644 index 000000000..e8d8ac973 --- /dev/null +++ b/benchmark/FedHPOB/scripts/gcn/cora_dp.yaml @@ -0,0 +1,41 @@ +use_gpu: True +device: 0 +early_stop: + patience: 100 +seed: 12345 +federate: + mode: standalone + make_global_eval: True + client_num: 5 + total_round_num: 500 + join_in_info: ['num_sample'] +data: + root: data/ + type: cora + splitter: 'louvain' + batch_size: 1 +model: + type: gcn + hidden: 64 + dropout: 0.5 + out_channels: 7 + task: node +criterion: + type: CrossEntropyLoss +train: + local_update_steps: 1 + optimizer: + lr: 0.25 + weight_decay: 0.0005 +trainer: + type: nodefullbatch_trainer +eval: + freq: 1 + metrics: ['acc', 'correct', 'f1'] + split: ['test', 'val', 'train'] +nbafl: + use: True + mu: 0.0 + w_clip: 0.1 + epsilon: 20 + constant: 1 \ No newline at end of file diff --git a/benchmark/FedHPOB/scripts/gcn/pubmed.yaml b/benchmark/FedHPOB/scripts/gcn/pubmed.yaml index a8b5d051b..4eb7a509f 100644 --- a/benchmark/FedHPOB/scripts/gcn/pubmed.yaml +++ b/benchmark/FedHPOB/scripts/gcn/pubmed.yaml @@ -21,6 +21,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 3 + task: node optimizer: lr: 0.25 weight_decay: 0.0005 diff --git a/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh b/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh new file mode 100644 index 000000000..18ba8086b --- /dev/null +++ b/benchmark/FedHPOB/scripts/gcn/run_hpo_cora_dp.sh @@ -0,0 +1,34 @@ +set -e + +cudaid=$1 +sample_num=$2 +eps=$3 + +# eps = (1, 10, 20) + +cd ../../../.. + +dataset=cora + +out_dir=out_${dataset}_dp${eps} + +echo "HPO starts..." + +lrs=(0.01 0.01668 0.02783 0.04642 0.07743 0.12915 0.21544 0.35938 0.59948 1.0) +wds=(0.0 0.001 0.01 0.1) +dps=(0.0 0.5) +steps=(1 2 3 4 5 6 7 8) + +for ((l = 0; l < ${#lrs[@]}; l++)); do + for ((w = 0; w < ${#wds[@]}; w++)); do + for ((d = 0; d < ${#dps[@]}; d++)); do + for ((s = 0; s < ${#steps[@]}; s++)); do + for k in {1..3}; do + python federatedscope/main.py --cfg benchmark/FedHPOB/scripts/gcn/cora_dp.yaml device $cudaid train.optimizer.lr ${lrs[$l]} train.optimizer.weight_decay ${wds[$w]} model.dropout ${dps[$d]} train.local_update_steps ${steps[$s]} federate.sample_client_num $sample_num seed $k outdir ${out_dir}/${sample_num} expname lr${lrs[$l]}_wd${wds[$w]}_dropout${dps[$d]}_step${steps[$s]}_seed${k} >/dev/null 2>&1 + done + done + done + done +done + +echo "HPO ends." diff --git a/federatedscope/core/trainers/trainer_nbafl.py b/federatedscope/core/trainers/trainer_nbafl.py index ea62002a0..53959b2fe 100644 --- a/federatedscope/core/trainers/trainer_nbafl.py +++ b/federatedscope/core/trainers/trainer_nbafl.py @@ -63,8 +63,12 @@ def init_nbafl_ctx(base_trainer): ctx.regularizer = get_regularizer(cfg.regularizer.type) # set noise scale during upload + if cfg.trainer.type == 'nodefullbatch_trainer': + num_train_data = sum(ctx.train_loader.dataset[0]['train_mask']) + else: + num_train_data = ctx.num_train_data ctx.nbafl_scale_u = cfg.nbafl.w_clip * cfg.federate.total_round_num * \ - cfg.nbafl.constant / ctx.num_train_data / \ + cfg.nbafl.constant / num_train_data / \ cfg.nbafl.epsilon @@ -109,9 +113,9 @@ def inject_noise_in_broadcast(cfg, sample_client_num, model): # Clip weight for p in model.parameters(): - p.data = p.data / torch.max(torch.ones(size=p.shape), - torch.abs(p.data) / cfg.nbafl.w_clip) - + p.data = p.data / torch.max( + torch.ones(size=p.shape, device=p.data.device), + torch.abs(p.data) / cfg.nbafl.w_clip) if len(sample_client_num) > 0: # Inject noise L = cfg.federate.sample_client_num if cfg.federate.sample_client_num\ @@ -120,7 +124,7 @@ def inject_noise_in_broadcast(cfg, sample_client_num, model): scale_d = 2 * cfg.nbafl.w_clip * cfg.nbafl.constant * np.sqrt( np.power(cfg.federate.total_round_num, 2) - np.power(L, 2) * cfg.federate.client_num) / ( - min(sample_client_num.values()) * cfg.federate.client_num * + min(sample_client_num) * cfg.federate.client_num * cfg.nbafl.epsilon) for p in model.parameters(): p.data += get_random("Normal", p.shape, { diff --git a/federatedscope/core/workers/client.py b/federatedscope/core/workers/client.py index 86a399a3a..2dbc4bdd7 100644 --- a/federatedscope/core/workers/client.py +++ b/federatedscope/core/workers/client.py @@ -412,6 +412,9 @@ def callback_funcs_for_join_in_info(self, message: Message): num_sample = self._cfg.train.local_update_steps * \ self.trainer.ctx.num_train_batch join_in_info['num_sample'] = num_sample + if self._cfg.trainer.type == 'nodefullbatch_trainer': + join_in_info['num_sample'] = \ + self.trainer.ctx.data.x.shape[0] elif requirement.lower() == 'client_resource': assert self.comm_bandwidth is not None and self.comp_speed \ is not None, "The requirement join_in_info " \ diff --git a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml index d3073ea4a..09285bf90 100644 --- a/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml +++ b/federatedscope/gfl/baseline/fedavg_gcn_fullbatch_on_dblpnew.yaml @@ -16,6 +16,7 @@ model: type: gcn hidden: 1024 out_channels: 4 + task: node train: optimizer: lr: 0.05 diff --git a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml index dcf8b84f3..d5bc2124c 100644 --- a/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/fedavg_gnn_node_fullbatch_citation.yaml @@ -18,6 +18,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 7 + task: node train: local_update_steps: 4 optimizer: diff --git a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml index 33cf95ee6..052b8f006 100644 --- a/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml +++ b/federatedscope/gfl/baseline/fedavg_on_cSBM.yaml @@ -17,6 +17,7 @@ model: type: gpr hidden: 256 out_channels: 2 + task: node #personalization: #local_param: ['prop1'] train: diff --git a/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml b/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml index 1e21255fd..840196e98 100644 --- a/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml +++ b/federatedscope/gfl/baseline/fedavg_sage_minibatch_on_dblpnew.yaml @@ -16,6 +16,7 @@ model: type: sage hidden: 1024 out_channels: 4 + task: node train: local_update_steps: 16 optimizer: diff --git a/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml b/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml index 051aa40e8..1672c5c98 100644 --- a/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml +++ b/federatedscope/gfl/baseline/fedavg_wpsn_on_cSBM.yaml @@ -16,6 +16,7 @@ model: type: gpr hidden: 256 out_channels: 2 + task: node personalization: local_param: ['prop1'] train: diff --git a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml index e13b7c6ab..6c960bd89 100644 --- a/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml +++ b/federatedscope/gfl/baseline/local_gnn_node_fullbatch_citation.yaml @@ -17,6 +17,7 @@ model: hidden: 64 dropout: 0.5 out_channels: 7 + task: node train: optimizer: lr: 0.05