Skip to content

Commit

Permalink
supports non-list mdata (deepmodeling#748)
Browse files Browse the repository at this point in the history
* supports non-list mdata

The mdata of a task is a list of a single dict. This "list" looks useless and not clear enough. So this commit supports using the dict without a list.

Note that old parameters are still supported, so no breaking changes are made. The "list" is just removed from all examples. Both list and non-list parameters are in the unittest.

* fix typo
  • Loading branch information
njzjz authored and ZLI-afk committed Jul 4, 2022
1 parent 4948c81 commit 8a27df3
Show file tree
Hide file tree
Showing 9 changed files with 65 additions and 58 deletions.
26 changes: 10 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ an example of new dpgen's machine.json
```json
{
"api_version": "1.0",
"train": [
"train":
{
"command": "dp",
"machine": {
Expand All @@ -1161,9 +1161,8 @@ an example of new dpgen's machine.json
"para_deg": 3,
"source_list": ["/home/user1234/deepmd.1.2.4.env"]
}
}
],
"model_devi":[
},
"model_devi":
{
"command": "lmp",
"machine":{
Expand All @@ -1184,9 +1183,8 @@ an example of new dpgen's machine.json
"group_size": 5,
"source_list": ["/home/user1234/deepmd.1.2.4.env"]
}
}
],
"fp":[
},
"fp":
{
"command": "vasp_std",
"machine":{
Expand All @@ -1208,7 +1206,6 @@ an example of new dpgen's machine.json
"source_list": ["~/vasp.env"]
}
}
]
}
```
note1: the key "local_root" in dpgen's machine.json is always `./`
Expand All @@ -1220,7 +1217,7 @@ When switching into a new machine, you may modifying the `MACHINE`, according to
An example for `MACHINE` is:
```json
{
"train": [
"train":
{
"machine": {
"batch": "slurm",
Expand All @@ -1243,9 +1240,8 @@ An example for `MACHINE` is:
"qos": "data"
},
"command": "USERPATH/dp"
}
],
"model_devi": [
},
"model_devi":
{
"machine": {
"batch": "slurm",
Expand All @@ -1269,9 +1265,8 @@ An example for `MACHINE` is:
},
"command": "lmp_serial",
"group_size": 1
}
],
"fp": [
},
"fp":
{
"machine": {
"batch": "slurm",
Expand All @@ -1298,7 +1293,6 @@ An example for `MACHINE` is:
"command": "vasp_gpu",
"group_size": 1
}
]
}
```
Following table illustrates which key is needed for three types of machine: `train`,`model_devi` and `fp`. Each of them is a list of dicts. Each dict can be considered as an independent environmnet for calculation.
Expand Down
11 changes: 4 additions & 7 deletions doc/run/example-of-machine.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ In this section, we will show you how to perform train task at a local workstati
In this example, we perform the `train` task on a local workstation.

```json
"train": [
"train":
{
"command": "dp",
"machine": {
Expand All @@ -36,8 +36,7 @@ In this example, we perform the `train` task on a local workstation.
"group_size": 1,
"source_list": ["/home/user1234/deepmd.env"]
}
}
],
},
```

The "command" for the train task in the DeePMD-kit is "dp".
Expand All @@ -51,7 +50,7 @@ In the resources parameter, "number_node", "cpu_per_node", and "gpu_per_node" sp
In this example, we perform the model_devi task at a local Slurm workstation.

```json
"model_devi": [
"model_devi":
{
"command": "lmp",
"machine": {
Expand All @@ -70,7 +69,6 @@ In this example, we perform the model_devi task at a local Slurm workstation.
"source_list": ["/home/user1234/lammps.env"]
}
}
],
```

The "command" for the model_devi task in the LAMMPS is "lmp".
Expand All @@ -84,7 +82,7 @@ In the resources parameter, we specify the name of the queue to which the task i
In this example, we perform the fp task at a remote PBS cluster that can be accessed via SSH.

```json
"fp": [
"fp":
{
"command": "mpirun -n 32 vasp_std",
"machine": {
Expand All @@ -106,7 +104,6 @@ In this example, we perform the fp task at a remote PBS cluster that can be acce
"source_list": ["/home/user1234/vasp.env"]
}
}
],
```

VASP code is used for fp task and mpi is used for parallel computing, so "mpirun -n 32" is added to specify the number of parallel threads.
Expand Down
13 changes: 10 additions & 3 deletions dpgen/remote/decide_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,18 @@ def convert_mdata(mdata, task_types=["train", "model_devi", "fp"]):
'''
for task_type in task_types:
if task_type in mdata:
for key, item in mdata[task_type][0].items():
if isinstance(mdata[task_type], dict):
task_data = mdata[task_type]
elif isinstance(mdata[task_type], (list, tuple)):
task_data = mdata[task_type][0]
else:
raise TypeError("mdata/%s should be dict or list!" % task_type)
for key, item in task_data.items():
if "comments" not in key:
mdata[task_type + "_" + key] = item
group_size = mdata[task_type][0]["resources"].get("group_size", 1)
if group_size == 1: group_size = mdata[task_type][0].get("group_size", 1)
group_size = task_data["resources"].get("group_size", 1)
if group_size == 1:
group_size = task_data.get("group_size", 1)
mdata[task_type + "_" + "group_size"] = group_size
return mdata

Expand Down
13 changes: 5 additions & 8 deletions examples/machine/DeePMD-kit-1.x/machine-ali.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"train": [
"train":
{
"machine": {
"batch": "shell",
Expand Down Expand Up @@ -34,10 +34,9 @@
},
"command": "/root/deepmd-kit/bin/dp",
"group_size": 2
}
],
},

"model_devi": [
"model_devi":
{
"machine": {
"batch": "shell",
Expand Down Expand Up @@ -71,10 +70,9 @@
},
"command": "/root/deepmd-kit/bin/lmp",
"group_size": 2
}
],
},

"fp": [
"fp":
{
"machine": {
"batch": "shell",
Expand Down Expand Up @@ -108,7 +106,6 @@
"command": "mpirun -n 16 /root/deepmd-pkg/vasp.5.4.4/bin/vasp_std",
"group_size": 1
}
]
}


Expand Down
13 changes: 5 additions & 8 deletions examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"train": [
"train":
{
"machine": {
"machine_type": "slurm",
Expand All @@ -25,9 +25,8 @@
"submit_wait_time": 60
},
"python_path": "/share/apps/deepmd/compress/bin/python3.8"
}
],
"model_devi": [
},
"model_devi":
{
"machine": {
"machine_type": "slurm",
Expand All @@ -54,9 +53,8 @@
},
"command": "lmp_mpi",
"group_size": 5
}
],
"fp": [
},
"fp":
{
"machine": {
"machine_type": "lsf",
Expand Down Expand Up @@ -87,5 +85,4 @@
"command": "cp2k.popt -i input.inp",
"group_size": 50
}
]
}
12 changes: 4 additions & 8 deletions examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"_comment" : "This is an example of DP-GEN on Slurm",
"_comment" : "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang",
"train" :[
"train" :
{
"_comment" : "Specify the installed path of DeePMD-kit",
"_comment" : "The version of DeePMD-kit should be 1.*",
Expand Down Expand Up @@ -49,10 +49,9 @@
"time_limit": "23:0:0",
"_comment": "that's all"
}
}
],
},

"model_devi": [
"model_devi":
{
"machine": {
"machine_type": "slurm",
Expand Down Expand Up @@ -81,10 +80,8 @@
"command": "lmp_serial",
"_comment" : "DP-GEN will put 5 tasks together in one submitting script.",
"group_size": 5
}
],
},
"fp":
[
{
"machine": {
"machine_type": "slurm",
Expand Down Expand Up @@ -113,5 +110,4 @@
"command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input",
"group_size": 1
}
]
}
11 changes: 5 additions & 6 deletions examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"api_version": "1.0",
"deepmd_version": "2.0.1",
"train" :[
"train" :
{
"command": "dp",
"machine": {
Expand Down Expand Up @@ -34,9 +34,9 @@
"queue_name": "GPU",
"group_size": 1
}
}],
},
"model_devi":
[{
{
"command": "lmp -i input.lammps -v restart 0",
"machine": {
"batch_type": "DpCloudServer",
Expand Down Expand Up @@ -68,9 +68,9 @@
"queue_name": "GPU",
"group_size": 5
}
}],
},
"fp":
[{
{
"command": "mpirun -n 16 vasp_std",
"machine": {
"batch_type": "DpCloudServer",
Expand Down Expand Up @@ -104,5 +104,4 @@
"source_list": ["/opt/intel/oneapi/setvars.sh"]
}
}
]
}
14 changes: 14 additions & 0 deletions tests/tools/machine_fp_single2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"fp":
{
"command": "vasp_std",
"machine":{
"batch_type": "PBS"
},
"resources": {
"group_size" : 8
},
"_comments" : "In user_forward_files, define input files to be uploaded.",
"user_forward_files" : ["vdw_kernel.bindat"]
}
}
10 changes: 8 additions & 2 deletions tests/tools/test_convert_mdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@
__package__ = 'tools'
from dpgen.remote.decide_machine import convert_mdata
from .context import setUpModule
machine_file = 'machine_fp_single.json'

class TestConvertMdata(unittest.TestCase):
machine_file = 'machine_fp_single.json'

def test_convert_mdata (self):
mdata = json.load(open(machine_file))
mdata = json.load(open(self.machine_file))
mdata = convert_mdata(mdata, ["fp"])
self.assertEqual(mdata["fp_command"], "vasp_std")
self.assertEqual(mdata["fp_group_size"], 8)
self.assertEqual(mdata["fp_machine"]["batch_type"], "PBS")
self.assertEqual(mdata["fp_user_forward_files"], ["vdw_kernel.bindat"])


class TestConvertMdata2(TestConvertMdata):
machine_file = 'machine_fp_single2.json'

0 comments on commit 8a27df3

Please sign in to comment.