ocropus-archive · zuphilip · May 13, 2015 · May 13, 2015 · May 13, 2015 · May 13, 2015
diff --git a/ocrolib/__init__.py b/ocrolib/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, print_function
+
 __all__ = [
     "binnednn","cairoextras","common","components","dbtables",
     "fgen","gmmtree","gtkyield","hocr","lang","native",
@@ -8,6 +10,6 @@
 ### top level imports
 ################################################################
 
-import default
-from common import *
-from default import traceback as trace
+from . import default
+from .common import *
+from .default import traceback as trace
diff --git a/ocrolib/chars.py b/ocrolib/chars.py
@@ -1,12 +1,14 @@
 # -*- encoding: utf-8 -*-
 
+from __future__ import absolute_import, division, print_function
+
 import re
 
 # common character sets
 
 digits = u"0123456789"
 letters = u"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
-symbols = ur"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
+symbols = u"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
 ascii = digits+letters+symbols
 
 xsymbols = u"""€¢£»«›‹÷©®†‡°∙•◦‣¶§÷¡¿▪▫"""
@@ -58,28 +60,28 @@
 
 def requote(s):
     s = unicode(s)
-    s = re.sub(ur"''",u'"',s)
+    s = re.sub(r"''",u'"',s)
     return s
 
 def requote_fancy(s,germanic=0):
     s = unicode(s)
     if germanic:
         # germanic quoting style reverses the shapes
         # straight double quotes
-        s = re.sub(ur"\s+''",u"”",s)
-        s = re.sub(u"''\s+",u"“",s)
-        s = re.sub(ur"\s+,,",u"„",s)
+        s = re.sub(r"\s+''",u"”",s)
+        s = re.sub(r"''\s+",u"“",s)
+        s = re.sub(r"\s+,,",u"„",s)
         # straight single quotes
-        s = re.sub(ur"\s+'",u"’",s)
-        s = re.sub(ur"'\s+",u"‘",s)
-        s = re.sub(ur"\s+,",u"‚",s)
+        s = re.sub(r"\s+'",u"’",s)
+        s = re.sub(r"'\s+",u"‘",s)
+        s = re.sub(r"\s+,",u"‚",s)
     else:
         # straight double quotes
-        s = re.sub(ur"\s+''",u"“",s)
-        s = re.sub(ur"''\s+",u"”",s)
-        s = re.sub(ur"\s+,,",u"„",s)
+        s = re.sub(r"\s+''",u"“",s)
+        s = re.sub(r"''\s+",u"”",s)
+        s = re.sub(r"\s+,,",u"„",s)
         # straight single quotes
-        s = re.sub(ur"\s+'",u"‘",s)
-        s = re.sub(ur"'\s+",u"’",s)
-        s = re.sub(ur"\s+,",u"‚",s)
+        s = re.sub(r"\s+'",u"‘",s)
+        s = re.sub(r"'\s+",u"’",s)
+        s = re.sub(r"\s+,",u"‚",s)
     return s
diff --git a/ocrolib/common.py b/ocrolib/common.py
@@ -3,6 +3,8 @@
 ### common functions for data structures, file name manipulation, etc.
 ################################################################
 
+from __future__ import absolute_import, division, print_function
+
 import os,os.path
 import re
 import numpy
@@ -13,14 +15,13 @@
 import glob
 from numpy import *
 from scipy.ndimage import morphology
-import ligatures
 import multiprocessing
-import lstm
 import pylab
-
 from pylab import imshow
-import morph
-from toplevel import *
+
+from . import ligatures
+from . import morph
+from .toplevel import *
 
 ################################################################
 ### exceptions
@@ -89,7 +90,7 @@ def _wrap(f):
         warned = 0
         def _wrapper(*args,**kw):
             if not warned:
-                print f,"has been DEPRECATED"
+                print(f, "has been DEPRECATED")
                 warned = 1
             return f(*args,**kw)
     return _wrap
@@ -100,7 +101,7 @@ def _wrapper(*args,**kw):
 # text normalization
 ################################################################
 
-import chars
+from . import chars
 replacements = chars.replacements
 
 def normalize_text(s):
@@ -109,10 +110,10 @@ def normalize_text(s):
     characters."""
     s = unicode(s)
     s = unicodedata.normalize('NFC',s)
-    s = re.sub(ur'\s+(?u)',' ',s)
-    s = re.sub(ur'\n(?u)','',s)
-    s = re.sub(ur'^\s+(?u)','',s)
-    s = re.sub(ur'\s+$(?u)','',s)
+    s = re.sub(r'\s+(?u)',' ',s)
+    s = re.sub(r'\n(?u)','',s)
+    s = re.sub(r'^\s+(?u)','',s)
+    s = re.sub(r'\s+$(?u)','',s)
     for m,r in replacements:
         s = re.sub(unicode(m),unicode(r),s)
     return s
@@ -121,23 +122,23 @@ def project_text(s,kind="exact"):
     """Project text onto a smaller subset of characters
     for comparison."""
     s = normalize_text(s)
-    s = re.sub(ur'( *[.] *){4,}',u'....',s) # dot rows
-    s = re.sub(ur'[~_]',u'',s) # dot rows
+    s = re.sub(r'( *[.] *){4,}',u'....',s) # dot rows
+    s = re.sub(r'[~_]',u'',s) # dot rows
     if kind=="exact":
         return s
     if kind=="nospace":
-        return re.sub(ur'\s','',s)
+        return re.sub(r'\s','',s)
     if kind=="spletdig":
-        return re.sub(ur'[^A-Za-z0-9 ]','',s)
+        return re.sub(r'[^A-Za-z0-9 ]','',s)
     if kind=="letdig":
-        return re.sub(ur'[^A-Za-z0-9]','',s)
+        return re.sub(r'[^A-Za-z0-9]','',s)
     if kind=="letters":
-        return re.sub(ur'[^A-Za-z]','',s)
+        return re.sub(r'[^A-Za-z]','',s)
     if kind=="digits":
-        return re.sub(ur'[^0-9]','',s)
+        return re.sub(r'[^0-9]','',s)
     if kind=="lnc":
         s = s.upper()
-        return re.sub(ur'[^A-Z]','',s)
+        return re.sub(r'[^A-Z]','',s)
     raise BadInput("unknown normalization: "+kind)
 
 ################################################################
@@ -222,7 +223,7 @@ def read_image_gray(fname,pageno=0):
     The optional page number allows images from files containing multiple
     images to be addressed.  Byte and short arrays are rescaled to
     the range 0...1 (unsigned) or -1...1 (signed)."""
-    if type(fname)==tuple: fname,pageno = fname
+    if isinstance(fname, tuple): fname,pageno = fname
     assert pageno==0
     pil = PIL.Image.open(fname)
     a = pil2array(pil)
@@ -248,7 +249,7 @@ def write_image_gray(fname,image,normalize=0,verbose=0):
     type, its values are clipped to the range [0,1],
     multiplied by 255 and converted to unsigned bytes.  Otherwise,
     the image must be of type unsigned byte."""
-    if verbose: print "# writing",fname
+    if verbose: print("# writing", fname)
     if isfloatarray(image):
         image = array(255*clip(image,0.0,1.0),'B')
     assert image.dtype==dtype('B'),"array has wrong dtype: %s"%image.dtype
@@ -259,7 +260,7 @@ def write_image_gray(fname,image,normalize=0,verbose=0):
 def read_image_binary(fname,dtype='i',pageno=0):
     """Read an image from disk and return it as a binary image
     of the given dtype."""
-    if type(fname)==tuple: fname,pageno = fname
+    if isinstance(fname, tuple): fname,pageno = fname
     assert pageno==0
     pil = PIL.Image.open(fname)
     a = pil2array(pil)
@@ -271,7 +272,7 @@ def write_image_binary(fname,image,verbose=0):
     """Write a binary image to disk. This verifies first that the given image
     is, in fact, binary.  The image may be of any type, but must consist of only
     two values."""
-    if verbose: print "# writing",fname
+    if verbose: print("# writing", fname)
     assert image.ndim==2
     image = array(255*(image>midrange(image)),'B')
     im = array2pil(image)
@@ -428,7 +429,7 @@ def bbox(self,i):
         """Return the bounding box in raster coordinates
         (row0,col0,row1,col1)."""
         r = self.objects[i]
-        # print "@@@bbox",i,r
+        # print("@@@bbox", i, r)
         return (r[0].start,r[1].start,r[0].stop,r[1].stop)
     def bboxMath(self,i):
         """Return the bounding box in math coordinates
@@ -442,7 +443,7 @@ def length(self):
     def mask(self,index,margin=0):
         """Return the mask for component index."""
         b = self.objects[index]
-        #print "@@@mask",index,b
+        # print("@@@mask", index, b)
         m = self.labels[b]
         m[m!=index] = 0
         if margin>0: m = pad_by(m,margin)
@@ -490,9 +491,10 @@ def save_object(fname,obj,zip=0):
 
 def unpickle_find_global(mname,cname):
     if mname=="lstm.lstm":
+        from . import lstm
         return getattr(lstm,cname)
-    if not mname in sys.modules.keys():
-        exec "import "+mname
+    if not mname in sys.modules:
+        exec("import "+mname)
     return getattr(sys.modules[mname],cname)
 
 def load_object(fname,zip=0,nofind=0,verbose=0):
@@ -502,7 +504,7 @@ class names that have changed."""
     if not nofind:
         fname = ocropus_find_file(fname)
     if verbose:
-        print "# loading object",fname
+        print("# loading object", fname)
     if zip==0 and fname.endswith(".gz"):
         zip = 1
     if zip>0:
@@ -572,30 +574,30 @@ def parallel_map(fun,jobs,parallel=0,chunksize=1):
 def check_valid_class_label(s):
     """Determines whether the given character is a valid class label.
     Control characters and spaces are not permitted."""
-    if type(s)==unicode:
+    if isinstance(s, unicode):
         if re.search(r'[\0-\x20]',s):
             raise BadClassLabel(s)
-    elif type(s)==str:
+    elif isinstance(s, str):
         if re.search(r'[^\x21-\x7e]',s):
             raise BadClassLabel(s)
     else:
         raise BadClassLabel(s)
 
 def summary(x):
     """Summarize a datatype as a string (for display and debugging)."""
-    if type(x)==numpy.ndarray:
+    if isinstance(x, numpy.ndarray):
         return "<ndarray %s %s>"%(x.shape,x.dtype)
-    if type(x)==str and len(x)>10:
+    if isinstance(x, str) and len(x)>10:
         return '"%s..."'%x
-    if type(x)==list and len(x)>10:
+    if isinstance(x, list) and len(x)>10:
         return '%s...'%x
     return str(x)
 
 ################################################################
 ### file name manipulation
 ################################################################
 
-from default import getlocal
+from .default import getlocal
 
 
 @checks(str,_=str)
@@ -636,7 +638,7 @@ def base(path):
 def write_text_simple(file,s):
     """Write the given string s to the output file."""
     with open(file,"w") as stream:
-        if type(s)==unicode: s = s.encode("utf-8")
+        if isinstance(s, unicode): s = s.encode("utf-8")
         stream.write(s)
 
 @checks([str])
@@ -839,8 +841,8 @@ def pyconstruct(s):
     path = s[:s.find("(")]
     if "." in path:
         module = path[:path.rfind(".")]
-        print "import",module
-        exec "import "+module in env
+        print("import", module)
+        exec("import "+module, env)
     return eval(s,env)
 
 def mkpython(name):
@@ -849,7 +851,7 @@ def mkpython(name):
     doesn't look like a Python class."""
     if name is None or len(name)==0:
         return None
-    elif type(name) is not str:
+    elif not isinstance(name, str):
         return name()
     elif name[0]=="=":
         return pyconstruct(name[1:])
@@ -896,15 +898,15 @@ def save_component(file,object,verbose=0,verify=0):
         ocropus.save_component(file,object)
         return
     if verbose:
-        print "[save_component]"
+        print("[save_component]")
     if verbose:
         for k,v in object.__dict__.items():
-            print ":",k,obinfo(v)
+            print(":", k, obinfo(v))
     with open(file,"wb") as stream:
         pickle.dump(object,stream,pickle_mode)
     if verify:
         if verbose:
-            print "[trying to read it again]"
+            print("[trying to read it again]")
         with open(file,"rb") as stream:
             pickle.load(stream)
 
@@ -961,7 +963,7 @@ def draw_aligned(result,axis=None):
         axis = subplot(111)
     axis.imshow(NI(result.image),cmap=cm.gray)
     cseg = result.cseg
-    if type(cseg)==numpy.ndarray: cseg = common.lseg2narray(cseg)
+    if isinstance(cseg, numpy.ndarray): cseg = common.lseg2narray(cseg)
     ocropy.make_line_segmentation_black(cseg)
     ocropy.renumber_labels(cseg,1)
     bboxes = ocropy.rectarray()

diff --git a/ocrolib/default.py b/ocrolib/default.py
@@ -1,5 +1,7 @@
 # the defaults used by the recognizer
 
+from __future__ import absolute_import, division, print_function
+
 import os
 
 modeldir = "/usr/local/share/ocropus/"

diff --git a/ocrolib/edist.py b/ocrolib/edist.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, print_function
+
 from scipy.ndimage import filters
 from pylab import *
 import re

diff --git a/ocrolib/extras/cairoextras.py b/ocrolib/extras/cairoextras.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, print_function
+
 import ctypes
 import cairo
 
@@ -25,7 +27,7 @@ def create_cairo_font_face_for_file(filename, faceindex=0, loadoptions=0):
         # initialize freetype
         _ft_lib = ctypes.c_void_p()
         if FT_Err_Ok != _freetype_so.FT_Init_FreeType(ctypes.byref(_ft_lib)):
-          raise "Error initialising FreeType library."
+          raise OSError("Error initialising FreeType library.")
         _surface = cairo.ImageSurface(cairo.FORMAT_A8, 0, 0)
         _initialized = True
     # create freetype face
@@ -34,14 +36,14 @@ def create_cairo_font_face_for_file(filename, faceindex=0, loadoptions=0):
     cairo_t = PycairoContext.from_address(id(cairo_ctx)).ctx
     _cairo_so.cairo_ft_font_face_create_for_ft_face.restype = ctypes.c_void_p
     if FT_Err_Ok != _freetype_so.FT_New_Face(_ft_lib, filename, faceindex, ctypes.byref(ft_face)):
-        raise "Error creating FreeType font face for " + filename
+        raise Exception("Error creating FreeType font face for " + filename)
     # create cairo font face for freetype face
     cr_face = _cairo_so.cairo_ft_font_face_create_for_ft_face(ft_face, loadoptions)
     if CAIRO_STATUS_SUCCESS != _cairo_so.cairo_font_face_status(cr_face):
-        raise "Error creating cairo font face for " + filename
+        raise Exception("Error creating cairo font face for " + filename)
     _cairo_so.cairo_set_font_face(cairo_t, cr_face)
     if CAIRO_STATUS_SUCCESS != _cairo_so.cairo_status(cairo_t):
-        raise "Error creating cairo font face for " + filename
+        raise Exception("Error creating cairo font face for " + filename)
     face = cairo_ctx.get_font_face()
     return face