Merge pull request #838 from jfbercher/nbTranslate

nbTranslate
ipython-contrib · Jan 13, 2017 · 6baf2f9 · 6baf2f9
2 parents e7a805b + 94fc88c
commit 6baf2f9
Show file tree

Hide file tree

Showing 12 changed files with 1,687 additions and 2 deletions.
diff --git a/setup.py b/setup.py
@@ -65,7 +65,7 @@ def main():
             'jupyter_contrib_core >=0.3',
             'jupyter_core',
             'jupyter_highlight_selected_word >=0.0.5',
-            'jupyter_latex_envs >=1.3.4',
+            'jupyter_latex_envs >=1.3.6',
             'jupyter_nbextensions_configurator',
             'nbconvert',
             'notebook >=4.0',
@@ -97,6 +97,7 @@ def main():
             ],
             'nbconvert.exporters': [
                 'html_toc = jupyter_contrib_nbextensions.nbconvert_support.toc2:TocExporter',  # noqa: E501
+                'selectLanguage = jupyter_contrib_nbextensions.nbconvert_support.nbTranslate:NotebookLangExporter',  # noqa: E501
                 'html_embed = jupyter_contrib_nbextensions.nbconvert_support.embedhtml:EmbedHTMLExporter',  # noqa: E501
             ],
         },
@@ -116,6 +117,5 @@ def main():
         ],
     )
 
-
 if __name__ == '__main__':
     main()
diff --git a/src/jupyter_contrib_nbextensions/nbconvert_support/__init__.py b/src/jupyter_contrib_nbextensions/nbconvert_support/__init__.py
@@ -9,6 +9,7 @@
 from .pre_pymarkdown import PyMarkdownPreprocessor
 from .pre_svg2pdf import SVG2PDFPreprocessor
 from .toc2 import TocExporter
+from .nbTranslate import NotebookLangExporter 
 
 __all__ = [
     'CodeFoldingPreprocessor',
@@ -20,6 +21,7 @@
     'SVG2PDFPreprocessor',
     'templates_directory',
     'TocExporter',
+    'NotebookLangExporter'
 ]
 
 

diff --git a/src/jupyter_contrib_nbextensions/nbconvert_support/nbTranslate.py b/src/jupyter_contrib_nbextensions/nbconvert_support/nbTranslate.py
@@ -0,0 +1,255 @@
+# -*- coding: utf-8 -*-
+
+"""nbTranslate Preprocessor Exporter class"""
+
+# -----------------------------------------------------------------------------
+# Copyright (c) 2016, J.-F. Bercher
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# -----------------------------------------------------------------------------
+
+# -----------------------------------------------------------------------------
+# Imports
+# -----------------------------------------------------------------------------
+
+from __future__ import print_function
+
+# Stdlib imports
+import os
+import re
+
+# IPython imports
+from IPython.display import HTML, display
+from nbconvert.preprocessors import Preprocessor
+from traitlets import Bool, Dict, Any, Unicode, Enum, Int, CaselessStrEnum
+from traitlets.config import Config
+
+from nbconvert.exporters.exporter import Exporter 
+from nbconvert.exporters.notebook import NotebookExporter
+import nbformat
+
+
+# -----------------------------------------------------------------------------
+# Preprocessor
+# -----------------------------------------------------------------------------
+
+langs = {
+    'auto': 'Automatic',
+    'af': 'Afrikaans',
+    'sq': 'Albanian',
+    'ar': 'Arabic',
+    'hy': 'Armenian',
+    'az': 'Azerbaijani',
+    'eu': 'Basque',
+    'be': 'Belarusian',
+    'bn': 'Bengali',
+    'bs': 'Bosnian',
+    'bg': 'Bulgarian',
+    'ca': 'Catalan',
+    'ceb': 'Cebuano',
+    'ny': 'Chichewa',
+    'zh-cn': 'Chinese Simplified',
+    'zh-tw': 'Chinese Traditional',
+    'co': 'Corsican',
+    'hr': 'Croatian',
+    'cs': 'Czech',
+    'da': 'Danish',
+    'nl': 'Dutch',
+    'en': 'English',
+    'eo': 'Esperanto',
+    'et': 'Estonian',
+    'tl': 'Filipino',
+    'fi': 'Finnish',
+    'fr': 'French',
+    'fy': 'Frisian',
+    'gl': 'Galician',
+    'ka': 'Georgian',
+    'de': 'German',
+    'el': 'Greek',
+    'gu': 'Gujarati',
+    'ht': 'Haitian Creole',
+    'ha': 'Hausa',
+    'haw': 'Hawaiian',
+    'iw': 'Hebrew',
+    'hi': 'Hindi',
+    'hmn': 'Hmong',
+    'hu': 'Hungarian',
+    'is': 'Icelandic',
+    'ig': 'Igbo',
+    'id': 'Indonesian',
+    'ga': 'Irish',
+    'it': 'Italian',
+    'ja': 'Japanese',
+    'jw': 'Javanese',
+    'kn': 'Kannada',
+    'kk': 'Kazakh',
+    'km': 'Khmer',
+    'ko': 'Korean',
+    'ku': 'Kurdish (Kurmanji)',
+    'ky': 'Kyrgyz',
+    'lo': 'Lao',
+    'la': 'Latin',
+    'lv': 'Latvian',
+    'lt': 'Lithuanian',
+    'lb': 'Luxembourgish',
+    'mk': 'Macedonian',
+    'mg': 'Malagasy',
+    'ms': 'Malay',
+    'ml': 'Malayalam',
+    'mt': 'Maltese',
+    'mi': 'Maori',
+    'mr': 'Marathi',
+    'mn': 'Mongolian',
+    'my': 'Myanmar (Burmese)',
+    'ne': 'Nepali',
+    'no': 'Norwegian',
+    'ps': 'Pashto',
+    'fa': 'Persian',
+    'pl': 'Polish',
+    'pt': 'Portuguese',
+    'ma': 'Punjabi',
+    'ro': 'Romanian',
+    'ru': 'Russian',
+    'sm': 'Samoan',
+    'gd': 'Scots Gaelic',
+    'sr': 'Serbian',
+    'st': 'Sesotho',
+    'sn': 'Shona',
+    'sd': 'Sindhi',
+    'si': 'Sinhala',
+    'sk': 'Slovak',
+    'sl': 'Slovenian',
+    'so': 'Somali',
+    'es': 'Spanish',
+    'su': 'Sudanese',
+    'sw': 'Swahili',
+    'sv': 'Swedish',
+    'tg': 'Tajik',
+    'ta': 'Tamil',
+    'te': 'Telugu',
+    'th': 'Thai',
+    'tr': 'Turkish',
+    'uk': 'Ukrainian',
+    'ur': 'Urdu',
+    'uz': 'Uzbek',
+    'vi': 'Vietnamese',
+    'cy': 'Welsh',
+    'xh': 'Xhosa',
+    'yi': 'Yiddish',
+    'yo': 'Yoruba',
+    'zu': 'Zulu'
+};
+
+class nbTranslatePreprocessor(Preprocessor):
+
+
+    def __init__(self, lang='en', **kw):
+        self.language = lang
+
+    def __call__(self, nb, resources, lang='en'):
+        if self.enabled:
+            self.log.debug("Applying preprocessor: %s",
+                           self.__class__.__name__)
+            return self.preprocess(nb, resources)
+        else:
+            return nb, resources
+
+    def preprocess(self, nb, resources):
+        """
+        Preprocessing to apply on each notebook.
+
+        Must return modified nb, resources.
+
+        If you wish to apply your preprocessing to each cell, you might want
+        to override preprocess_cell method instead.
+
+        Parameters
+        ----------
+        nb : NotebookNode
+            Notebook being converted
+        resources : dictionary
+            Additional resources used in the conversion process.  Allows
+            preprocessors to pass variables into the Jinja engine.
+        """
+
+        filtered_cells = []   
+        for cell in nb.cells:
+            if cell.cell_type == 'markdown':
+                if (cell.get('metadata', {}).get('lang', self.language) == self.language):
+                    filtered_cells.append(cell)
+            else:
+                filtered_cells.append(cell)
+
+        nb.cells = filtered_cells
+        return super(nbTranslatePreprocessor, self).preprocess(nb, resources)
+
+
+    def preprocess_cell(self, cell, resources, index):
+        """
+        Preprocess cell
+
+        Parameters
+        ----------
+        cell : NotebookNode cell
+            Notebook cell being processed
+        resources : dictionary
+            Additional resources used in the conversion process.  Allows
+            preprocessors to pass variables into the Jinja engine.
+        cell_index : int
+            Index of the cell being processed (see base.py)
+        """
+
+        return cell, resources
+
+# ----------------------------------------------------------------
+# Exporter
+# ----------------------------------------------------------------
+
+class NotebookLangExporter(NotebookExporter):
+    """Exports to an IPython notebook."""
+
+    nbformat_version = Enum(list(nbformat.versions),
+        default_value=nbformat.current_nbformat,
+        config=True,
+        help="""The nbformat version to write.
+        Use this to downgrade notebooks.
+        """
+    )
+
+#    language = CaselessStrEnum(langs.keys(), shortname="rh",
+#                         help="Selected language").tag(config=True)
+
+    language = Unicode('en', shortname="rh",
+                         help="Selected language").tag(config=True)
+
+    addSuffix = Bool(True, help="Use language tag as suffix")
+
+    #language = 'en'
+
+
+    def _file_extension_default(self):
+        return '.ipynb'
+
+    output_mimetype = 'application/json'
+
+    def from_notebook_node(self, nb, resources=None, **kw):
+
+        if (self.language not in langs.keys()):
+            raise ValueError("""Error -- {} is not a valid language abbreviation
+            Please select one of the abbreviations in the list\n {}""".format(self.language, langs))
+
+
+        nbtranslatepreprocessor = nbTranslatePreprocessor(lang=self.language)
+        self.register_preprocessor(nbtranslatepreprocessor, enabled=True)
+        self._init_preprocessors()
+        nb, resources = nbtranslatepreprocessor(nb, resources)
+
+        nb_copy, resources = super(NotebookLangExporter, self).from_notebook_node(nb, resources, **kw)
+        if self.addSuffix: 
+            resources['output_suffix'] = '_'+self.language
+
+        return nb_copy, resources
+
+
+
diff --git a/src/jupyter_contrib_nbextensions/nbextensions/nbTranslate/README.md b/src/jupyter_contrib_nbextensions/nbextensions/nbTranslate/README.md
@@ -0,0 +1,56 @@
+# nbTranslate -- helps translating notebooks -- provides multilanguage support
+
+This extension
+
+- helps converting markdown cells in a notebook from a language to another (optionally using **Google translate**),
+- enables to selectively display cells from a given language in a multilanguage notebook. 
+
+Basically, the extension allows to copy the original cell into a new one for editing and translating. Optionally, the cell source text can be passed through `google translate` and the result inserted in the new cell. Basic markdown structures (e.g. bold, emphasis, lists) are preserved/restored after conversion, to the best extent, but this is not perfect, and usually one has to correct the text and structures afterward. Similarly, equations are extracted before conversion and restored in the result. It seems that when translating from lang1 to lang2, the best results are obtained by taking English as an intermediate language.
+
+A metadata indicating the language used is added to each cell. This allows to selectively display cells for a particular language and hide the other ones. As far as they are concerned, code cells are preserved. This way, one can get a kind of multilanguage notebook. A menu is provided to select the languages to display in the notebook. 
+
+
+![](demo1.gif)
+
+![](demo2.gif)
+
+
+## Compatibility
+
+The extension has been written to play nicely with
+- [latex_envs]: LaTeX environments are protected before conversion and restored after. For environments with a text content, e.g. theorem, remark, etc, the content is still translated. Some minor updates have been applied to `latex_envs` to ensure the best compatibilty; so update if necessary via 
+```
+pip install jupyter_latex_envs --upgrade [--user|sys-prefix]
+jupyter nbextension install --py latex_envs --user
+jupyter nbextension enable latex_envs --user --py
+```
+- [toc2]: cells of non displayed languages are hidden and unrendered so that the toc corresponds only to the selected languages; The toc is automatically updated each time a language is added/removed. 
+
+## Configuration
+- Parameters values can be changed using the `nbextensions-configurator`: it is possible to choose the initial source and target languages, to choose to use of google translate engine or not, to specify the initially displayed languages, the position of the language selection menu, and define a keyboard shortcut
+- A *configuration toolbar* is provided which enables to change the main options per notebook. In the configuration toolbar, one can toogle the use of the google translate engine, select the source and target languges, and finally select the language to display. 
+
+## Export 
+It is possible to extract one language from the multilanguage notebook. An exporter with an entry-point `selectLanguage` is provided that `converts` the notebook into another one as follows
+```
+jupyter nbconvert --to selectLanguage --NotebookLangExporter.language=lang  FILE.ipynb 
+```
+where lang is a valid language abbreviation, e.g. en, fr, ar, sp, ... See the full list <a href='languages.js'> here.</a>
+
+
+Installation
+------------
+
+If you use [jupyter-contrib-nbextensions](https://github.com/ipython-contrib/jupyter_contrib_nbextensions), proceed as usual. 
+
+Otherwise, you can still install/try the extension from my personal repo, using
+```
+jupyter nbextension install https://rawgit.com/jfbercher/jupyter_nbTranslate/master/nbTranslate.zip --user
+jupyter nbextension enable nbTranslate/main
+```
+[Note that for now, installing from this repo does not install the python module and add the entry points for exporting as described above]
+
+To remove
+```
+jupyter nbextension uninstall nbTranslate/main
+```
diff --git a/src/jupyter_contrib_nbextensions/nbextensions/nbTranslate/demo1.gif b/src/jupyter_contrib_nbextensions/nbextensions/nbTranslate/demo1.gif
diff --git a/src/jupyter_contrib_nbextensions/nbextensions/nbTranslate/demo2.gif b/src/jupyter_contrib_nbextensions/nbextensions/nbTranslate/demo2.gif