Skip to content

Commit

Permalink
Addressed PR feedback and changed default to preserve langauge info
Browse files Browse the repository at this point in the history
  • Loading branch information
MSeal committed Sep 7, 2020
1 parent 3877e3f commit 9977c33
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 13 deletions.
52 changes: 44 additions & 8 deletions nbconvert/preprocessors/clearmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,57 @@ class ClearMetadataPreprocessor(Preprocessor):
Removes all the metadata from all code cells in a notebook.
"""

clear_cell_metadata = Bool(True,
help=("Flag to choose if cell metadata is to be cleared "
"in addition to notebook metadata.")).tag(config=True)
clear_notebook_metadata = Bool(True,
help=("Flag to choose if notebook metadata is to be cleared "
"in addition to cell metadata.")).tag(config=True)
preserve_metadata_keys = Set(
help=("Indicates the keys to preserve when deleting metadata "
"across both cells and notebook metadata fields.")).tag(config=True)
preserve_nb_metadata_mask = Set([('language_info', 'name')],
help=("Indicates the key paths to preserve when deleting metadata "
"across both cells and notebook metadata fields. Tuples of "
"keys can be passed to preserved specific nested values")).tag(config=True)
preserve_cell_metadata_mask = Set(
help=("Indicates the key paths to preserve when deleting metadata "
"across both cells and notebook metadata fields. Tuples of "
"keys can be passed to preserved specific nested values")).tag(config=True)

def current_key(self, mask_key):
if isinstance(mask_key, str):
return mask_key
elif len(mask_key) == 0:
# Safeguard
return None
else:
return mask_key[0]

def current_mask(self, mask):
return { self.current_key(k) for k in mask if self.current_key(k) is not None }

def nested_masks(self, mask):
return { self.current_key(k[0]): k[1:] for k in mask if k and not isinstance(k, str) and len(k) > 1 }

def nested_filter(self, items, mask):
keep_current = self.current_mask(mask)
keep_nested_lookup = self.nested_masks(mask)
for k, v in items:
keep_nested = keep_nested_lookup.get(k)
if k in keep_current:
if keep_nested is not None:
if isinstance(v, dict):
yield k, dict(self.nested_filter(v.items(), keep_nested))
else:
yield k, v

def preprocess_cell(self, cell, resources, cell_index):
"""
All the code cells are returned with an empty metadata field.
"""
if cell.cell_type == 'code':
# Remove metadata
if 'metadata' in cell:
cell.metadata = { k: v for k,v in cell.metadata.items() if k in self.preserve_metadata_keys }
if self.clear_cell_metadata:
if cell.cell_type == 'code':
# Remove metadata
if 'metadata' in cell:
cell.metadata = dict(self.nested_filter(cell.metadata.items(), self.preserve_cell_metadata_mask))
return cell, resources

def preprocess(self, nb, resources):
Expand All @@ -45,5 +81,5 @@ def preprocess(self, nb, resources):
nb, resources = super().preprocess(nb, resources)
if self.clear_notebook_metadata:
if 'metadata' in nb:
nb.metadata = { k: v for k,v in nb.metadata.items() if k in self.preserve_metadata_keys }
nb.metadata = dict(self.nested_filter(nb.metadata.items(), self.preserve_nb_metadata_mask))
return nb, resources
69 changes: 64 additions & 5 deletions nbconvert/preprocessors/tests/test_clearmetadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ class TestClearMetadata(PreprocessorTestsBase):

def build_notebook(self):
notebook = super().build_notebook()
notebook.metadata = {'language': 'python'}
notebook.metadata = {
'language_info': {'name': 'python', 'version': '3.6.7'},
'kernelspec': {'language': 'python', 'name': 'python3'}
}
# Add a test field to the first cell
if 'metadata' not in notebook.cells[0]:
notebook.cells[0].metadata = {}
notebook.cells[0].metadata['test_field'] = 'test_value'
notebook.cells[0].metadata['test_nested'] = { 'test_keep': 'keep', 'test_filtered': 'filter' }
notebook.cells[0].metadata['executeTime'] = dict([('end_time', '09:31:50'),
('start_time', '09:31:49')])
return notebook
Expand All @@ -41,7 +45,8 @@ def test_default_output(self):
nb, res = preprocessor(nb, res)

assert not nb.cells[0].metadata
assert not nb.metadata
# By default we only perserve the langauge name
assert nb.metadata == {'language_info': {'name': 'python'}}

def test_cell_only(self):
"""Test the output of the ClearMetadataPreprocessor"""
Expand All @@ -53,22 +58,76 @@ def test_cell_only(self):
assert not nb.cells[0].metadata
assert nb.metadata

def test_notebook_only(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(clear_cell_metadata=False, preserve_nb_metadata_mask=set())
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata
assert not nb.metadata

def test_selective_cell_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(preserve_metadata_keys=['test_field'])
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=['test_field'],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_field': 'test_value' }
assert not nb.metadata

def test_selective_cell_tuple_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
# Ensure that a tuple length 1 works as well as a string key
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=[('test_field',)],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_field': 'test_value' }
assert not nb.metadata

def test_nested_cell_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=[('test_nested', 'test_keep')],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_nested': { 'test_keep': 'keep' } }
assert not nb.metadata

def test_nested_cell_tuple_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
# Ensure that a tuple length 1 works as well as a string key
preprocessor = self.build_preprocessor(
preserve_cell_metadata_mask=[('test_nested', ('test_keep',))],
preserve_nb_metadata_mask=set()
)
nb, res = preprocessor(nb, res)

assert nb.cells[0].metadata == { 'test_nested': { 'test_keep': 'keep' } }
assert not nb.metadata

def test_selective_notebook_metadata(self):
"""Test the output of the ClearMetadataPreprocessor"""
nb = self.build_notebook()
res = self.build_resources()
preprocessor = self.build_preprocessor(preserve_metadata_keys=['language'])
preprocessor = self.build_preprocessor(preserve_nb_metadata_mask=['kernelspec'])
nb, res = preprocessor(nb, res)

assert not nb.cells[0].metadata
assert nb.metadata == { 'language': 'python' }
assert nb.metadata == { 'kernelspec': { 'language': 'python', 'name': 'python3' } }

0 comments on commit 9977c33

Please sign in to comment.