RDFLib · joernhees · Nov 20, 2014 · Sep 13, 2014 · Sep 13, 2014 · Nov 17, 2014
diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py
@@ -308,9 +308,9 @@ def unicodeExpand(m):
         raise Exception("Invalid unicode code point: " + m.group(1))
 
 unicodeEscape4 = re.compile(
-    r'\\u([0-9a-f]{4})', flags=re.I)
+    r'\\u([0-9a-fA-F]{4})')
 unicodeEscape8 = re.compile(
-    r'\\U([0-9a-f]{8})', flags=re.I)
+    r'\\U([0-9a-fA-F]{8})')
 
 
 
@@ -1581,23 +1581,23 @@ def strconst(self, argstr, i, delim):
         self.BadSyntax(argstr, i,
                         "unterminated string literal")
 
-    def _unicodeEscape(self, argstr, i, startline, reg, n):
+    def _unicodeEscape(self, argstr, i, startline, reg, n, prefix):
         if len(argstr)<i+n:
             raise BadSyntax(
                     self._thisDoc, startline, argstr, i,
                     "unterminated string literal(3)")
         try:
-            return i+n, reg.sub(unicodeExpand, '\\u'+argstr[i:i+n])
+            return i+n, reg.sub(unicodeExpand, '\\'+prefix+argstr[i:i+n])
         except:
             raise BadSyntax(
                 self._thisDoc, startline, argstr, i,
                 "bad string literal hex escape: "+argstr[i:i+n])
 
     def uEscape(self, argstr, i, startline):
-        return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4)
+        return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, 'u')
 
     def UEscape(self, argstr, i, startline):
-        return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8 )
+        return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, 'U')
 
     def BadSyntax(self, argstr, i, msg):
         raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg)

diff --git a/test/test_turtle_serialize.py b/test/test_turtle_serialize.py
@@ -40,6 +40,22 @@ def testTurtleBoolList():
     assert bool_list == [True, False, True]
 
 
+def testUnicodeEscaping():
+    turtle_string = " <http://example.com/A> <http://example.com/B> <http://example.com/aaa\u00F3bbbb> . <http://example.com/A> <http://example.com/C> <http://example.com/zzz\U00100000zzz> . <http://example.com/A> <http://example.com/D> <http://example.com/aaa\u00f3bbb> ."
+    g = Graph()
+
+    # shouldn't get an exception
+    g.parse(data=turtle_string, format="turtle")
+    triples = sorted(list(g))
+    assert len(triples) == 3
+    print triples
+    # Now check that was decoded into python values properly
+    assert triples[0][2] == URIRef(u'http://example.com/aaa\xf3bbbb')
+    assert triples[1][2] == URIRef(u'http://example.com/zzz\U00100000zzz')
+    assert triples[2][2] == URIRef(u'http://example.com/aaa\xf3bbb')
+
+
+
 if __name__ == "__main__":
     import nose, sys
     nose.main(defaultTest=sys.argv[0])