python · fgallaire · Feb 14, 2017 · Feb 14, 2017 · Feb 14, 2017 · Feb 14, 2017
diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst
@@ -117,6 +117,28 @@ functions should be good enough; otherwise, you should use an instance of
    .. versionadded:: 3.3
 
 
+.. function:: cjk_wide(char)
+
+   Return ``True`` if *char* is Fullwidth or Wide, ``False`` otherwise.
+   Fullwidth and Wide CJK chars are double-width.
+
+   .. versionadded:: 3.7
+
+
+.. function:: cjk_len(text)
+
+   Return the real width of *text* (its len if not a string).
+
+   .. versionadded:: 3.7
+
+
+.. function:: cjk_slices(text, index)
+
+   Return the two slices of *text* cut to *index*.
+
+   .. versionadded:: 3.7
+
+
 :func:`wrap`, :func:`fill` and :func:`shorten` work by creating a
 :class:`TextWrapper` instance and calling a single method on it.  That
 instance is not reused, so for applications that process many text
@@ -276,6 +298,13 @@ hyphenated words; only then will long words be broken if necessary, unless
       .. versionadded:: 3.4
 
 
+   .. attribute:: cjk
+
+      (default: ``False``) Handle double-width CJK chars.
+
+      .. versionadded:: 3.7
+
+
    :class:`TextWrapper` also provides some public methods, analogous to the
    module-level convenience functions:
 

diff --git a/Lib/idlelib/idle_test/test_calltips.py b/Lib/idlelib/idle_test/test_calltips.py
@@ -72,7 +72,7 @@ def test_signature_wrap(self):
 (width=70, initial_indent='', subsequent_indent='', expand_tabs=True,
     replace_whitespace=True, fix_sentence_endings=False, break_long_words=True,
     drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None,
-    placeholder=' [...]')''')
+    placeholder=' [...]', cjk=False)''')
 
     def test_docline_truncation(self):
         def f(): pass

diff --git a/Lib/test/test_textwrap.py b/Lib/test/test_textwrap.py
@@ -566,6 +566,10 @@ def setUp(self):
         self.text = '''\
 Did you say "supercalifragilisticexpialidocious?"
 How *do* you spell that odd word, anyways?
+'''
+        self.text_cjk = '''\
+Did you say "いろはにほへとちりぬるをいろはにほ?"
+How りぬ るをいろはにほり ぬるは, anyways?
 '''
 
     def test_break_long(self):
@@ -579,6 +583,14 @@ def test_break_long(self):
         self.check_wrap(self.text, 50,
                         ['Did you say "supercalifragilisticexpialidocious?"',
                          'How *do* you spell that odd word, anyways?'])
+        self.check_wrap(self.text_cjk, 30,
+                        ['Did you say "いろはにほへとち',
+                         'りぬるをいろはにほ?" How りぬ',
+                         'るをいろはにほり ぬるは,',
+                         'anyways?'], cjk=True)
+        self.check_wrap(self.text_cjk, 50,
+                        ['Did you say "いろはにほへとちりぬるをいろはにほ?"',
+                         'How りぬ るをいろはにほり ぬるは, anyways?'], cjk=True)
 
         # SF bug 797650.  Prevent an infinite loop by making sure that at
         # least one character gets split off on every pass.

diff --git a/Lib/textwrap.py b/Lib/textwrap.py
@@ -3,11 +3,13 @@
 
 # Copyright (C) 1999-2001 Gregory P. Ward.
 # Copyright (C) 2002, 2003 Python Software Foundation.
+# Copyright (C) 2015-2017 Florent Gallaire <[email protected]>
 # Written by Greg Ward <[email protected]>
 
 import re
 
-__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
+__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten',
+           'cjk_wide', 'cjk_len', 'cjk_slices']
 
 # Hardcode the recognized whitespace characters to the US-ASCII
 # whitespace characters.  The main reason for doing this is that
@@ -61,6 +63,8 @@ class TextWrapper:
         Truncate wrapped lines.
       placeholder (default: ' [...]')
         Append to the last line of truncated text.
+      cjk (default: false)
+        Handle double-width CJK chars.
     """
 
     unicode_whitespace_trans = {}
@@ -125,7 +129,8 @@ def __init__(self,
                  tabsize=8,
                  *,
                  max_lines=None,
-                 placeholder=' [...]'):
+                 placeholder=' [...]',
+                 cjk=False):
         self.width = width
         self.initial_indent = initial_indent
         self.subsequent_indent = subsequent_indent
@@ -138,7 +143,9 @@ def __init__(self,
         self.tabsize = tabsize
         self.max_lines = max_lines
         self.placeholder = placeholder
+        self.cjk = cjk
 
+        self._width = cjk_len if self.cjk else len
 
     # -- Private methods -----------------------------------------------
     # (possibly useful for subclasses to override)
@@ -215,8 +222,13 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
         # If we're allowed to break long words, then do so: put as much
         # of the next chunk onto the current line as will fit.
         if self.break_long_words:
-            cur_line.append(reversed_chunks[-1][:space_left])
-            reversed_chunks[-1] = reversed_chunks[-1][space_left:]
+            if self.cjk:
+                chunk_start, chunk_end = cjk_slices(reversed_chunks[-1], space_left)
+                cur_line.append(chunk_start)
+                reversed_chunks[-1] = chunk_end
+            else:
+                cur_line.append(reversed_chunks[-1][:space_left])
+                reversed_chunks[-1] = reversed_chunks[-1][space_left:]
 
         # Otherwise, we have to preserve the long word intact.  Only add
         # it to the current line if there's nothing already there --
@@ -246,6 +258,9 @@ def _wrap_chunks(self, chunks):
         lines = []
         if self.width <= 0:
             raise ValueError("invalid width %r (must be > 0)" % self.width)
+        elif self.width == 1 and (sum(self._width(chunk) for chunk in chunks) >
+                                  sum(len(chunk) for chunk in chunks)):
+            raise ValueError("invalid width 1 (must be > 1 when CJK chars)")
         if self.max_lines is not None:
             if self.max_lines > 1:
                 indent = self.subsequent_indent
@@ -280,7 +295,7 @@ def _wrap_chunks(self, chunks):
                 del chunks[-1]
 
             while chunks:
-                l = len(chunks[-1])
+                l = self._width(chunks[-1])
 
                 # Can at least squeeze this chunk onto the current line.
                 if cur_len + l <= width:
@@ -293,7 +308,7 @@ def _wrap_chunks(self, chunks):
 
             # The current line is full, and the next chunk is too big to
             # fit on *any* line (not just this one).
-            if chunks and len(chunks[-1]) > width:
+            if chunks and self._width(chunks[-1]) > width:
                 self._handle_long_word(chunks, cur_line, cur_len, width)
                 cur_len = sum(map(len, cur_line))
 
@@ -365,7 +380,7 @@ def fill(self, text):
 
 # -- Convenience interface ---------------------------------------------
 
-def wrap(text, width=70, **kwargs):
+def wrap(text, width=70, cjk=False, **kwargs):
     """Wrap a single paragraph of text, returning a list of wrapped lines.
 
     Reformat the single paragraph in 'text' so it fits in lines of no
@@ -375,10 +390,10 @@ def wrap(text, width=70, **kwargs):
     space.  See TextWrapper class for available keyword args to customize
     wrapping behaviour.
     """
-    w = TextWrapper(width=width, **kwargs)
+    w = TextWrapper(width=width, cjk=cjk, **kwargs)
     return w.wrap(text)
 
-def fill(text, width=70, **kwargs):
+def fill(text, width=70, cjk=False, **kwargs):
     """Fill a single paragraph of text, returning a new string.
 
     Reformat the single paragraph in 'text' to fit in lines of no more
@@ -387,10 +402,10 @@ def fill(text, width=70, **kwargs):
     whitespace characters converted to space.  See TextWrapper class for
     available keyword args to customize wrapping behaviour.
     """
-    w = TextWrapper(width=width, **kwargs)
+    w = TextWrapper(width=width, cjk=cjk, **kwargs)
     return w.fill(text)
 
-def shorten(text, width, **kwargs):
+def shorten(text, width, cjk=False, **kwargs):
     """Collapse and truncate the given text to fit in the given width.
 
     The text first has its whitespace collapsed.  If it then fits in
@@ -402,10 +417,43 @@ def shorten(text, width, **kwargs):
         >>> textwrap.shorten("Hello  world!", width=11)
         'Hello [...]'
     """
-    w = TextWrapper(width=width, max_lines=1, **kwargs)
+    w = TextWrapper(width=width, cjk=cjk, max_lines=1, **kwargs)
     return w.fill(' '.join(text.strip().split()))
 
 
+# -- CJK support ------------------------------------------------------
+
+def cjk_wide(char):
+    """Return True if char is Fullwidth or Wide, False otherwise.
+    Fullwidth and Wide CJK chars are double-width.
+    """
+    import unicodedata
+    return unicodedata.east_asian_width(char) in ('F', 'W')
+
+
+def cjk_len(text):
+    """Return the real width of text (its len if not a string).
+    """
+    if not isinstance(text, str):
+        return len(text)
+    return sum(2 if cjk_wide(char) else 1 for char in text)
+
+
+def cjk_slices(text, index):
+    """Return the two slices of text cut to index.
+    """
+    if not isinstance(text, str):
+        return text[:index], text[index:]
+    if cjk_len(text) <= index:
+        return text, ''
+    width = 0
+    for i, char in enumerate(text):
+        width = width + cjk_wide(char) + 1
+        if width > index:
+            break
+    return text[:i], text[i:]
+
+
 # -- Loosely related functionality -------------------------------------
 
 _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)

diff --git a/Misc/ACKS b/Misc/ACKS
@@ -495,6 +495,7 @@ Lele Gaifax
 Santiago Gala
 Yitzchak Gale
 Matthew Gallagher
+Florent Gallaire
 Quentin Gallet-Gilles
 Riccardo Attilio Galli
 Raymund Galvin