Skip to content

Commit

Permalink
Merge pull request #838 from jfbercher/nbTranslate
Browse files Browse the repository at this point in the history
nbTranslate
  • Loading branch information
jcb91 authored Jan 13, 2017
2 parents e7a805b + 94fc88c commit 6baf2f9
Show file tree
Hide file tree
Showing 12 changed files with 1,687 additions and 2 deletions.
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def main():
'jupyter_contrib_core >=0.3',
'jupyter_core',
'jupyter_highlight_selected_word >=0.0.5',
'jupyter_latex_envs >=1.3.4',
'jupyter_latex_envs >=1.3.6',
'jupyter_nbextensions_configurator',
'nbconvert',
'notebook >=4.0',
Expand Down Expand Up @@ -97,6 +97,7 @@ def main():
],
'nbconvert.exporters': [
'html_toc = jupyter_contrib_nbextensions.nbconvert_support.toc2:TocExporter', # noqa: E501
'selectLanguage = jupyter_contrib_nbextensions.nbconvert_support.nbTranslate:NotebookLangExporter', # noqa: E501
'html_embed = jupyter_contrib_nbextensions.nbconvert_support.embedhtml:EmbedHTMLExporter', # noqa: E501
],
},
Expand All @@ -116,6 +117,5 @@ def main():
],
)


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .pre_pymarkdown import PyMarkdownPreprocessor
from .pre_svg2pdf import SVG2PDFPreprocessor
from .toc2 import TocExporter
from .nbTranslate import NotebookLangExporter

__all__ = [
'CodeFoldingPreprocessor',
Expand All @@ -20,6 +21,7 @@
'SVG2PDFPreprocessor',
'templates_directory',
'TocExporter',
'NotebookLangExporter'
]


Expand Down
255 changes: 255 additions & 0 deletions src/jupyter_contrib_nbextensions/nbconvert_support/nbTranslate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
# -*- coding: utf-8 -*-

"""nbTranslate Preprocessor Exporter class"""

# -----------------------------------------------------------------------------
# Copyright (c) 2016, J.-F. Bercher
#
# Distributed under the terms of the Modified BSD License.
#
# -----------------------------------------------------------------------------

# -----------------------------------------------------------------------------
# Imports
# -----------------------------------------------------------------------------

from __future__ import print_function

# Stdlib imports
import os
import re

# IPython imports
from IPython.display import HTML, display
from nbconvert.preprocessors import Preprocessor
from traitlets import Bool, Dict, Any, Unicode, Enum, Int, CaselessStrEnum
from traitlets.config import Config

from nbconvert.exporters.exporter import Exporter
from nbconvert.exporters.notebook import NotebookExporter
import nbformat


# -----------------------------------------------------------------------------
# Preprocessor
# -----------------------------------------------------------------------------

langs = {
'auto': 'Automatic',
'af': 'Afrikaans',
'sq': 'Albanian',
'ar': 'Arabic',
'hy': 'Armenian',
'az': 'Azerbaijani',
'eu': 'Basque',
'be': 'Belarusian',
'bn': 'Bengali',
'bs': 'Bosnian',
'bg': 'Bulgarian',
'ca': 'Catalan',
'ceb': 'Cebuano',
'ny': 'Chichewa',
'zh-cn': 'Chinese Simplified',
'zh-tw': 'Chinese Traditional',
'co': 'Corsican',
'hr': 'Croatian',
'cs': 'Czech',
'da': 'Danish',
'nl': 'Dutch',
'en': 'English',
'eo': 'Esperanto',
'et': 'Estonian',
'tl': 'Filipino',
'fi': 'Finnish',
'fr': 'French',
'fy': 'Frisian',
'gl': 'Galician',
'ka': 'Georgian',
'de': 'German',
'el': 'Greek',
'gu': 'Gujarati',
'ht': 'Haitian Creole',
'ha': 'Hausa',
'haw': 'Hawaiian',
'iw': 'Hebrew',
'hi': 'Hindi',
'hmn': 'Hmong',
'hu': 'Hungarian',
'is': 'Icelandic',
'ig': 'Igbo',
'id': 'Indonesian',
'ga': 'Irish',
'it': 'Italian',
'ja': 'Japanese',
'jw': 'Javanese',
'kn': 'Kannada',
'kk': 'Kazakh',
'km': 'Khmer',
'ko': 'Korean',
'ku': 'Kurdish (Kurmanji)',
'ky': 'Kyrgyz',
'lo': 'Lao',
'la': 'Latin',
'lv': 'Latvian',
'lt': 'Lithuanian',
'lb': 'Luxembourgish',
'mk': 'Macedonian',
'mg': 'Malagasy',
'ms': 'Malay',
'ml': 'Malayalam',
'mt': 'Maltese',
'mi': 'Maori',
'mr': 'Marathi',
'mn': 'Mongolian',
'my': 'Myanmar (Burmese)',
'ne': 'Nepali',
'no': 'Norwegian',
'ps': 'Pashto',
'fa': 'Persian',
'pl': 'Polish',
'pt': 'Portuguese',
'ma': 'Punjabi',
'ro': 'Romanian',
'ru': 'Russian',
'sm': 'Samoan',
'gd': 'Scots Gaelic',
'sr': 'Serbian',
'st': 'Sesotho',
'sn': 'Shona',
'sd': 'Sindhi',
'si': 'Sinhala',
'sk': 'Slovak',
'sl': 'Slovenian',
'so': 'Somali',
'es': 'Spanish',
'su': 'Sudanese',
'sw': 'Swahili',
'sv': 'Swedish',
'tg': 'Tajik',
'ta': 'Tamil',
'te': 'Telugu',
'th': 'Thai',
'tr': 'Turkish',
'uk': 'Ukrainian',
'ur': 'Urdu',
'uz': 'Uzbek',
'vi': 'Vietnamese',
'cy': 'Welsh',
'xh': 'Xhosa',
'yi': 'Yiddish',
'yo': 'Yoruba',
'zu': 'Zulu'
};

class nbTranslatePreprocessor(Preprocessor):


def __init__(self, lang='en', **kw):
self.language = lang

def __call__(self, nb, resources, lang='en'):
if self.enabled:
self.log.debug("Applying preprocessor: %s",
self.__class__.__name__)
return self.preprocess(nb, resources)
else:
return nb, resources

def preprocess(self, nb, resources):
"""
Preprocessing to apply on each notebook.
Must return modified nb, resources.
If you wish to apply your preprocessing to each cell, you might want
to override preprocess_cell method instead.
Parameters
----------
nb : NotebookNode
Notebook being converted
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
"""

filtered_cells = []
for cell in nb.cells:
if cell.cell_type == 'markdown':
if (cell.get('metadata', {}).get('lang', self.language) == self.language):
filtered_cells.append(cell)
else:
filtered_cells.append(cell)

nb.cells = filtered_cells
return super(nbTranslatePreprocessor, self).preprocess(nb, resources)


def preprocess_cell(self, cell, resources, index):
"""
Preprocess cell
Parameters
----------
cell : NotebookNode cell
Notebook cell being processed
resources : dictionary
Additional resources used in the conversion process. Allows
preprocessors to pass variables into the Jinja engine.
cell_index : int
Index of the cell being processed (see base.py)
"""

return cell, resources

# ----------------------------------------------------------------
# Exporter
# ----------------------------------------------------------------

class NotebookLangExporter(NotebookExporter):
"""Exports to an IPython notebook."""

nbformat_version = Enum(list(nbformat.versions),
default_value=nbformat.current_nbformat,
config=True,
help="""The nbformat version to write.
Use this to downgrade notebooks.
"""
)

# language = CaselessStrEnum(langs.keys(), shortname="rh",
# help="Selected language").tag(config=True)

language = Unicode('en', shortname="rh",
help="Selected language").tag(config=True)

addSuffix = Bool(True, help="Use language tag as suffix")

#language = 'en'


def _file_extension_default(self):
return '.ipynb'

output_mimetype = 'application/json'

def from_notebook_node(self, nb, resources=None, **kw):

if (self.language not in langs.keys()):
raise ValueError("""Error -- {} is not a valid language abbreviation
Please select one of the abbreviations in the list\n {}""".format(self.language, langs))


nbtranslatepreprocessor = nbTranslatePreprocessor(lang=self.language)
self.register_preprocessor(nbtranslatepreprocessor, enabled=True)
self._init_preprocessors()
nb, resources = nbtranslatepreprocessor(nb, resources)

nb_copy, resources = super(NotebookLangExporter, self).from_notebook_node(nb, resources, **kw)
if self.addSuffix:
resources['output_suffix'] = '_'+self.language

return nb_copy, resources



Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# nbTranslate -- helps translating notebooks -- provides multilanguage support

This extension

- helps converting markdown cells in a notebook from a language to another (optionally using **Google translate**),
- enables to selectively display cells from a given language in a multilanguage notebook.

Basically, the extension allows to copy the original cell into a new one for editing and translating. Optionally, the cell source text can be passed through `google translate` and the result inserted in the new cell. Basic markdown structures (e.g. bold, emphasis, lists) are preserved/restored after conversion, to the best extent, but this is not perfect, and usually one has to correct the text and structures afterward. Similarly, equations are extracted before conversion and restored in the result. It seems that when translating from lang1 to lang2, the best results are obtained by taking English as an intermediate language.

A metadata indicating the language used is added to each cell. This allows to selectively display cells for a particular language and hide the other ones. As far as they are concerned, code cells are preserved. This way, one can get a kind of multilanguage notebook. A menu is provided to select the languages to display in the notebook.


![](demo1.gif)

![](demo2.gif)


## Compatibility

The extension has been written to play nicely with
- [latex_envs]: LaTeX environments are protected before conversion and restored after. For environments with a text content, e.g. theorem, remark, etc, the content is still translated. Some minor updates have been applied to `latex_envs` to ensure the best compatibilty; so update if necessary via
```
pip install jupyter_latex_envs --upgrade [--user|sys-prefix]
jupyter nbextension install --py latex_envs --user
jupyter nbextension enable latex_envs --user --py
```
- [toc2]: cells of non displayed languages are hidden and unrendered so that the toc corresponds only to the selected languages; The toc is automatically updated each time a language is added/removed.

## Configuration
- Parameters values can be changed using the `nbextensions-configurator`: it is possible to choose the initial source and target languages, to choose to use of google translate engine or not, to specify the initially displayed languages, the position of the language selection menu, and define a keyboard shortcut
- A *configuration toolbar* is provided which enables to change the main options per notebook. In the configuration toolbar, one can toogle the use of the google translate engine, select the source and target languges, and finally select the language to display.

## Export
It is possible to extract one language from the multilanguage notebook. An exporter with an entry-point `selectLanguage` is provided that `converts` the notebook into another one as follows
```
jupyter nbconvert --to selectLanguage --NotebookLangExporter.language=lang FILE.ipynb
```
where lang is a valid language abbreviation, e.g. en, fr, ar, sp, ... See the full list <a href='languages.js'> here.</a>


Installation
------------

If you use [jupyter-contrib-nbextensions](https://github.com/ipython-contrib/jupyter_contrib_nbextensions), proceed as usual.

Otherwise, you can still install/try the extension from my personal repo, using
```
jupyter nbextension install https://rawgit.com/jfbercher/jupyter_nbTranslate/master/nbTranslate.zip --user
jupyter nbextension enable nbTranslate/main
```
[Note that for now, installing from this repo does not install the python module and add the entry points for exporting as described above]

To remove
```
jupyter nbextension uninstall nbTranslate/main
```
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 6baf2f9

Please sign in to comment.