Skip to content
This repository has been archived by the owner on Dec 11, 2024. It is now read-only.

Commit

Permalink
Igbh dataset download script improvemenet (mlcommons#632)
Browse files Browse the repository at this point in the history
* changes for igbh dataset download

* changed logic precedence

* handle if env not in dependency
  • Loading branch information
anandhu-eng authored Nov 29, 2024
1 parent 6cf933e commit 69d670c
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 41 deletions.
4 changes: 3 additions & 1 deletion automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3605,7 +3605,9 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
"update_tags_from_env_with_prefix", {})
for t in update_tags_from_env_with_prefix:
for key in update_tags_from_env_with_prefix[t]:
if str(env.get(key, '')).strip() != '':
if str(d.get('env', {}).get(key, '')).strip() != '':
d['tags'] += "," + t + str(d.get('env')[key])
elif str(env.get(key, '')).strip() != '':
d['tags'] += "," + t + str(env[key])

for key in clean_env_keys_deps:
Expand Down
100 changes: 60 additions & 40 deletions script/get-dataset-mlperf-inference-igbh/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper,node_feat
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -46,10 +47,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper,node_label_19
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -61,10 +63,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper,node_label_2K
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -76,10 +79,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -92,10 +96,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -108,10 +113,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,author,author_id_index_mapping
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -123,10 +129,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,author,node_feat
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -139,10 +146,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -154,10 +162,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,conference,node_feat
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -170,10 +179,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -185,10 +195,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,institute,node_feat
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -201,10 +212,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -216,10 +228,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,journal,node_feat
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -232,10 +245,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -247,10 +261,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,fos,node_feat
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -263,10 +278,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -279,10 +295,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper_published_journal,edge_index
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -295,10 +312,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -311,10 +329,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand All @@ -327,10 +346,11 @@ prehook_deps:
extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index
force_cache: true
enable_if_env:
CM_IGBH_DATASET_TYPE: full
CM_IGBH_DATASET_TYPE:
- 'full'
names:
- dae
tags: download-and-extract
tags: download-and-extract,_wget
update_tags_from_env_with_prefix:
_url.:
- CM_PACKAGE_URL
Expand Down

0 comments on commit 69d670c

Please sign in to comment.