-
Notifications
You must be signed in to change notification settings - Fork 811
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #838 from jfbercher/nbTranslate
nbTranslate
- Loading branch information
Showing
12 changed files
with
1,687 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
255 changes: 255 additions & 0 deletions
255
src/jupyter_contrib_nbextensions/nbconvert_support/nbTranslate.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
"""nbTranslate Preprocessor Exporter class""" | ||
|
||
# ----------------------------------------------------------------------------- | ||
# Copyright (c) 2016, J.-F. Bercher | ||
# | ||
# Distributed under the terms of the Modified BSD License. | ||
# | ||
# ----------------------------------------------------------------------------- | ||
|
||
# ----------------------------------------------------------------------------- | ||
# Imports | ||
# ----------------------------------------------------------------------------- | ||
|
||
from __future__ import print_function | ||
|
||
# Stdlib imports | ||
import os | ||
import re | ||
|
||
# IPython imports | ||
from IPython.display import HTML, display | ||
from nbconvert.preprocessors import Preprocessor | ||
from traitlets import Bool, Dict, Any, Unicode, Enum, Int, CaselessStrEnum | ||
from traitlets.config import Config | ||
|
||
from nbconvert.exporters.exporter import Exporter | ||
from nbconvert.exporters.notebook import NotebookExporter | ||
import nbformat | ||
|
||
|
||
# ----------------------------------------------------------------------------- | ||
# Preprocessor | ||
# ----------------------------------------------------------------------------- | ||
|
||
langs = { | ||
'auto': 'Automatic', | ||
'af': 'Afrikaans', | ||
'sq': 'Albanian', | ||
'ar': 'Arabic', | ||
'hy': 'Armenian', | ||
'az': 'Azerbaijani', | ||
'eu': 'Basque', | ||
'be': 'Belarusian', | ||
'bn': 'Bengali', | ||
'bs': 'Bosnian', | ||
'bg': 'Bulgarian', | ||
'ca': 'Catalan', | ||
'ceb': 'Cebuano', | ||
'ny': 'Chichewa', | ||
'zh-cn': 'Chinese Simplified', | ||
'zh-tw': 'Chinese Traditional', | ||
'co': 'Corsican', | ||
'hr': 'Croatian', | ||
'cs': 'Czech', | ||
'da': 'Danish', | ||
'nl': 'Dutch', | ||
'en': 'English', | ||
'eo': 'Esperanto', | ||
'et': 'Estonian', | ||
'tl': 'Filipino', | ||
'fi': 'Finnish', | ||
'fr': 'French', | ||
'fy': 'Frisian', | ||
'gl': 'Galician', | ||
'ka': 'Georgian', | ||
'de': 'German', | ||
'el': 'Greek', | ||
'gu': 'Gujarati', | ||
'ht': 'Haitian Creole', | ||
'ha': 'Hausa', | ||
'haw': 'Hawaiian', | ||
'iw': 'Hebrew', | ||
'hi': 'Hindi', | ||
'hmn': 'Hmong', | ||
'hu': 'Hungarian', | ||
'is': 'Icelandic', | ||
'ig': 'Igbo', | ||
'id': 'Indonesian', | ||
'ga': 'Irish', | ||
'it': 'Italian', | ||
'ja': 'Japanese', | ||
'jw': 'Javanese', | ||
'kn': 'Kannada', | ||
'kk': 'Kazakh', | ||
'km': 'Khmer', | ||
'ko': 'Korean', | ||
'ku': 'Kurdish (Kurmanji)', | ||
'ky': 'Kyrgyz', | ||
'lo': 'Lao', | ||
'la': 'Latin', | ||
'lv': 'Latvian', | ||
'lt': 'Lithuanian', | ||
'lb': 'Luxembourgish', | ||
'mk': 'Macedonian', | ||
'mg': 'Malagasy', | ||
'ms': 'Malay', | ||
'ml': 'Malayalam', | ||
'mt': 'Maltese', | ||
'mi': 'Maori', | ||
'mr': 'Marathi', | ||
'mn': 'Mongolian', | ||
'my': 'Myanmar (Burmese)', | ||
'ne': 'Nepali', | ||
'no': 'Norwegian', | ||
'ps': 'Pashto', | ||
'fa': 'Persian', | ||
'pl': 'Polish', | ||
'pt': 'Portuguese', | ||
'ma': 'Punjabi', | ||
'ro': 'Romanian', | ||
'ru': 'Russian', | ||
'sm': 'Samoan', | ||
'gd': 'Scots Gaelic', | ||
'sr': 'Serbian', | ||
'st': 'Sesotho', | ||
'sn': 'Shona', | ||
'sd': 'Sindhi', | ||
'si': 'Sinhala', | ||
'sk': 'Slovak', | ||
'sl': 'Slovenian', | ||
'so': 'Somali', | ||
'es': 'Spanish', | ||
'su': 'Sudanese', | ||
'sw': 'Swahili', | ||
'sv': 'Swedish', | ||
'tg': 'Tajik', | ||
'ta': 'Tamil', | ||
'te': 'Telugu', | ||
'th': 'Thai', | ||
'tr': 'Turkish', | ||
'uk': 'Ukrainian', | ||
'ur': 'Urdu', | ||
'uz': 'Uzbek', | ||
'vi': 'Vietnamese', | ||
'cy': 'Welsh', | ||
'xh': 'Xhosa', | ||
'yi': 'Yiddish', | ||
'yo': 'Yoruba', | ||
'zu': 'Zulu' | ||
}; | ||
|
||
class nbTranslatePreprocessor(Preprocessor): | ||
|
||
|
||
def __init__(self, lang='en', **kw): | ||
self.language = lang | ||
|
||
def __call__(self, nb, resources, lang='en'): | ||
if self.enabled: | ||
self.log.debug("Applying preprocessor: %s", | ||
self.__class__.__name__) | ||
return self.preprocess(nb, resources) | ||
else: | ||
return nb, resources | ||
|
||
def preprocess(self, nb, resources): | ||
""" | ||
Preprocessing to apply on each notebook. | ||
Must return modified nb, resources. | ||
If you wish to apply your preprocessing to each cell, you might want | ||
to override preprocess_cell method instead. | ||
Parameters | ||
---------- | ||
nb : NotebookNode | ||
Notebook being converted | ||
resources : dictionary | ||
Additional resources used in the conversion process. Allows | ||
preprocessors to pass variables into the Jinja engine. | ||
""" | ||
|
||
filtered_cells = [] | ||
for cell in nb.cells: | ||
if cell.cell_type == 'markdown': | ||
if (cell.get('metadata', {}).get('lang', self.language) == self.language): | ||
filtered_cells.append(cell) | ||
else: | ||
filtered_cells.append(cell) | ||
|
||
nb.cells = filtered_cells | ||
return super(nbTranslatePreprocessor, self).preprocess(nb, resources) | ||
|
||
|
||
def preprocess_cell(self, cell, resources, index): | ||
""" | ||
Preprocess cell | ||
Parameters | ||
---------- | ||
cell : NotebookNode cell | ||
Notebook cell being processed | ||
resources : dictionary | ||
Additional resources used in the conversion process. Allows | ||
preprocessors to pass variables into the Jinja engine. | ||
cell_index : int | ||
Index of the cell being processed (see base.py) | ||
""" | ||
|
||
return cell, resources | ||
|
||
# ---------------------------------------------------------------- | ||
# Exporter | ||
# ---------------------------------------------------------------- | ||
|
||
class NotebookLangExporter(NotebookExporter): | ||
"""Exports to an IPython notebook.""" | ||
|
||
nbformat_version = Enum(list(nbformat.versions), | ||
default_value=nbformat.current_nbformat, | ||
config=True, | ||
help="""The nbformat version to write. | ||
Use this to downgrade notebooks. | ||
""" | ||
) | ||
|
||
# language = CaselessStrEnum(langs.keys(), shortname="rh", | ||
# help="Selected language").tag(config=True) | ||
|
||
language = Unicode('en', shortname="rh", | ||
help="Selected language").tag(config=True) | ||
|
||
addSuffix = Bool(True, help="Use language tag as suffix") | ||
|
||
#language = 'en' | ||
|
||
|
||
def _file_extension_default(self): | ||
return '.ipynb' | ||
|
||
output_mimetype = 'application/json' | ||
|
||
def from_notebook_node(self, nb, resources=None, **kw): | ||
|
||
if (self.language not in langs.keys()): | ||
raise ValueError("""Error -- {} is not a valid language abbreviation | ||
Please select one of the abbreviations in the list\n {}""".format(self.language, langs)) | ||
|
||
|
||
nbtranslatepreprocessor = nbTranslatePreprocessor(lang=self.language) | ||
self.register_preprocessor(nbtranslatepreprocessor, enabled=True) | ||
self._init_preprocessors() | ||
nb, resources = nbtranslatepreprocessor(nb, resources) | ||
|
||
nb_copy, resources = super(NotebookLangExporter, self).from_notebook_node(nb, resources, **kw) | ||
if self.addSuffix: | ||
resources['output_suffix'] = '_'+self.language | ||
|
||
return nb_copy, resources | ||
|
||
|
||
|
56 changes: 56 additions & 0 deletions
56
src/jupyter_contrib_nbextensions/nbextensions/nbTranslate/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# nbTranslate -- helps translating notebooks -- provides multilanguage support | ||
|
||
This extension | ||
|
||
- helps converting markdown cells in a notebook from a language to another (optionally using **Google translate**), | ||
- enables to selectively display cells from a given language in a multilanguage notebook. | ||
|
||
Basically, the extension allows to copy the original cell into a new one for editing and translating. Optionally, the cell source text can be passed through `google translate` and the result inserted in the new cell. Basic markdown structures (e.g. bold, emphasis, lists) are preserved/restored after conversion, to the best extent, but this is not perfect, and usually one has to correct the text and structures afterward. Similarly, equations are extracted before conversion and restored in the result. It seems that when translating from lang1 to lang2, the best results are obtained by taking English as an intermediate language. | ||
|
||
A metadata indicating the language used is added to each cell. This allows to selectively display cells for a particular language and hide the other ones. As far as they are concerned, code cells are preserved. This way, one can get a kind of multilanguage notebook. A menu is provided to select the languages to display in the notebook. | ||
|
||
|
||
![](demo1.gif) | ||
|
||
![](demo2.gif) | ||
|
||
|
||
## Compatibility | ||
|
||
The extension has been written to play nicely with | ||
- [latex_envs]: LaTeX environments are protected before conversion and restored after. For environments with a text content, e.g. theorem, remark, etc, the content is still translated. Some minor updates have been applied to `latex_envs` to ensure the best compatibilty; so update if necessary via | ||
``` | ||
pip install jupyter_latex_envs --upgrade [--user|sys-prefix] | ||
jupyter nbextension install --py latex_envs --user | ||
jupyter nbextension enable latex_envs --user --py | ||
``` | ||
- [toc2]: cells of non displayed languages are hidden and unrendered so that the toc corresponds only to the selected languages; The toc is automatically updated each time a language is added/removed. | ||
|
||
## Configuration | ||
- Parameters values can be changed using the `nbextensions-configurator`: it is possible to choose the initial source and target languages, to choose to use of google translate engine or not, to specify the initially displayed languages, the position of the language selection menu, and define a keyboard shortcut | ||
- A *configuration toolbar* is provided which enables to change the main options per notebook. In the configuration toolbar, one can toogle the use of the google translate engine, select the source and target languges, and finally select the language to display. | ||
|
||
## Export | ||
It is possible to extract one language from the multilanguage notebook. An exporter with an entry-point `selectLanguage` is provided that `converts` the notebook into another one as follows | ||
``` | ||
jupyter nbconvert --to selectLanguage --NotebookLangExporter.language=lang FILE.ipynb | ||
``` | ||
where lang is a valid language abbreviation, e.g. en, fr, ar, sp, ... See the full list <a href='languages.js'> here.</a> | ||
|
||
|
||
Installation | ||
------------ | ||
|
||
If you use [jupyter-contrib-nbextensions](https://github.com/ipython-contrib/jupyter_contrib_nbextensions), proceed as usual. | ||
|
||
Otherwise, you can still install/try the extension from my personal repo, using | ||
``` | ||
jupyter nbextension install https://rawgit.com/jfbercher/jupyter_nbTranslate/master/nbTranslate.zip --user | ||
jupyter nbextension enable nbTranslate/main | ||
``` | ||
[Note that for now, installing from this repo does not install the python module and add the entry points for exporting as described above] | ||
|
||
To remove | ||
``` | ||
jupyter nbextension uninstall nbTranslate/main | ||
``` |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.