From 73217058727d46d812e2e0662809f033b4ee37e1 Mon Sep 17 00:00:00 2001 From: Rory McCann Date: Sat, 13 Sep 2014 19:53:50 +0100 Subject: [PATCH 1/3] Correctly handle \u in Notation 3 files. It was using case insensitive regex, so it was mistaking \u and \U. --- rdflib/plugins/parsers/notation3.py | 4 ++-- test/test_turtle_serialize.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 966eea068..2fa4fba5f 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -308,9 +308,9 @@ def unicodeExpand(m): raise Exception("Invalid unicode code point: " + m.group(1)) unicodeEscape4 = re.compile( - r'\\u([0-9a-f]{4})', flags=re.I) + r'\\u([0-9a-fA-F]{4})') unicodeEscape8 = re.compile( - r'\\U([0-9a-f]{8})', flags=re.I) + r'\\U([0-9a-fA-F]{8})') diff --git a/test/test_turtle_serialize.py b/test/test_turtle_serialize.py index 13cdee66f..bc10ada9f 100644 --- a/test/test_turtle_serialize.py +++ b/test/test_turtle_serialize.py @@ -40,6 +40,22 @@ def testTurtleBoolList(): assert bool_list == [True, False, True] +def testUnicodeEscaping(): + turtle_string = " . . ." + g = Graph() + + # shouldn't get an exception + g.parse(data=turtle_string, format="turtle") + triples = sorted(list(g)) + assert len(triples) == 3 + print triples + # Now check that was decoded into python values properly + assert triples[0][2] == URIRef(u'http://example.com/aaa\xf3bbbb') + assert triples[1][2] == URIRef(u'http://example.com/zzz\U00100000zzz') + assert triples[2][2] == URIRef(u'http://example.com/aaa\xf3bbb') + + + if __name__ == "__main__": import nose, sys nose.main(defaultTest=sys.argv[0]) From 7f140d2e3034d9c479766ad61839df92d62bbfc6 Mon Sep 17 00:00:00 2001 From: Rory McCann Date: Sat, 13 Sep 2014 19:53:50 +0100 Subject: [PATCH 2/3] Correctly handle \u in Notation 3 files. It was using case insensitive regex, so it was mistaking \u and \U. --- rdflib/plugins/parsers/notation3.py | 4 ++-- test/test_turtle_serialize.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 966eea068..2fa4fba5f 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -308,9 +308,9 @@ def unicodeExpand(m): raise Exception("Invalid unicode code point: " + m.group(1)) unicodeEscape4 = re.compile( - r'\\u([0-9a-f]{4})', flags=re.I) + r'\\u([0-9a-fA-F]{4})') unicodeEscape8 = re.compile( - r'\\U([0-9a-f]{8})', flags=re.I) + r'\\U([0-9a-fA-F]{8})') diff --git a/test/test_turtle_serialize.py b/test/test_turtle_serialize.py index 13cdee66f..bc10ada9f 100644 --- a/test/test_turtle_serialize.py +++ b/test/test_turtle_serialize.py @@ -40,6 +40,22 @@ def testTurtleBoolList(): assert bool_list == [True, False, True] +def testUnicodeEscaping(): + turtle_string = " . . ." + g = Graph() + + # shouldn't get an exception + g.parse(data=turtle_string, format="turtle") + triples = sorted(list(g)) + assert len(triples) == 3 + print triples + # Now check that was decoded into python values properly + assert triples[0][2] == URIRef(u'http://example.com/aaa\xf3bbbb') + assert triples[1][2] == URIRef(u'http://example.com/zzz\U00100000zzz') + assert triples[2][2] == URIRef(u'http://example.com/aaa\xf3bbb') + + + if __name__ == "__main__": import nose, sys nose.main(defaultTest=sys.argv[0]) From 96c30f98bbb628e13aaa32c9c392584b0fbf8788 Mon Sep 17 00:00:00 2001 From: Yves-Marie Haussonne Date: Mon, 17 Nov 2014 02:36:04 +0100 Subject: [PATCH 3/3] Complement pull request #426 https://github.com/RDFLib/rdflib/pull/426. Correct failing tests. Add correct case for prefix escape in unicode expansion in litterals --- rdflib/plugins/parsers/notation3.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rdflib/plugins/parsers/notation3.py b/rdflib/plugins/parsers/notation3.py index 2fa4fba5f..b9821d362 100755 --- a/rdflib/plugins/parsers/notation3.py +++ b/rdflib/plugins/parsers/notation3.py @@ -1581,23 +1581,23 @@ def strconst(self, argstr, i, delim): self.BadSyntax(argstr, i, "unterminated string literal") - def _unicodeEscape(self, argstr, i, startline, reg, n): + def _unicodeEscape(self, argstr, i, startline, reg, n, prefix): if len(argstr)