diff --git a/rdflib/term.py b/rdflib/term.py index 37c0e84a9..37fa4ac7a 100644 --- a/rdflib/term.py +++ b/rdflib/term.py @@ -50,7 +50,6 @@ from isodate import parse_time, parse_date, parse_datetime - try: from hashlib import md5 assert md5 @@ -63,7 +62,6 @@ from rdflib.compat import numeric_greater - b = py3compat.b skolem_genid = "/.well-known/genid/" @@ -83,6 +81,24 @@ def _is_valid_uri(uri): def _is_valid_langtag(tag): return bool(_lang_tag_regex.match(tag)) +def _is_valid_unicode(value): + """ + Verify that the provided value can be converted into a Python + unicode object. + """ + if isinstance(value, bytes): + coding_func, param = getattr(value, 'decode'), 'utf-8' + elif py3compat.PY3: + coding_func, param = str, value + else: + coding_func, param = unicode, value + + # try to convert value into unicode + try: + coding_func(param) + except UnicodeError: + return False + return True class Node(object): """ @@ -571,7 +587,7 @@ def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None): if value is not None and normalize: _value, _datatype = _castPythonToLiteral(value) - if _value is not None: + if _value is not None and _is_valid_unicode(_value): lexical_or_value = _value else: @@ -585,7 +601,6 @@ def __new__(cls, lexical_or_value, lang=None, datatype=None, normalize=None): if datatype: lang = None - if py3compat.PY3 and isinstance(lexical_or_value, bytes): lexical_or_value = lexical_or_value.decode('utf-8') @@ -1495,8 +1510,7 @@ def _castPythonToLiteral(obj): URIRef(_XSD_PFX + 'unsignedByte'): int, URIRef(_XSD_PFX + 'float'): float, URIRef(_XSD_PFX + 'double'): float, - URIRef( - _XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(py3compat.b(s)), + URIRef(_XSD_PFX + 'base64Binary'): lambda s: base64.b64decode(s), URIRef(_XSD_PFX + 'anyURI'): None, _RDF_XMLLITERAL: _parseXML, _RDF_HTMLLITERAL: _parseHTML diff --git a/test/test_term.py b/test/test_term.py index fdbea4d53..78fa7a034 100644 --- a/test/test_term.py +++ b/test/test_term.py @@ -3,9 +3,12 @@ """ import unittest +import base64 + from rdflib.py3compat import format_doctest_out as uformat -from rdflib.term import URIRef, BNode +from rdflib.term import URIRef, BNode, Literal, _is_valid_unicode from rdflib.graph import QuotedGraph, Graph +from rdflib.namespace import XSD class TestURIRefRepr(unittest.TestCase): """ @@ -25,15 +28,38 @@ def testGracefulOrdering(self): a = u>BNode() a = u>QuotedGraph(g.store, u) a = u>g - - - - + class TestBNodeRepr(unittest.TestCase): - + def testSubclassNameAppearsInRepr(self): class MyBNode(BNode): pass x = MyBNode() self.assertTrue(repr(x).startswith("MyBNode(")) + + +class TestLiteral(unittest.TestCase): + + def test_base64_values(self): + b64msg = 'cmRmbGliIGlzIGNvb2whIGFsc28gaGVyZSdzIHNvbWUgYmluYXJ5IAAR83UC' + decoded_b64msg = base64.b64decode(b64msg) + lit = Literal(b64msg, datatype=XSD.base64Binary) + self.assertEqual(lit.value, decoded_b64msg) + self.assertEqual(str(lit), b64msg) + + +class TestValidityFunctions(unittest.TestCase): + + def test_is_valid_unicode(self): + testcase_list = ( + (None, True), + (1, True), + (['foo'], True), + ({'foo': b'bar'}, True), + ('foo', True), + (b'foo\x00', True), + (b'foo\xf3\x02', False) + ) + for val, expected in testcase_list: + self.assertEqual(_is_valid_unicode(val), expected)