diff --git a/rdflib/plugins/sparql/parser.py b/rdflib/plugins/sparql/parser.py index ba19082de..db8c41440 100644 --- a/rdflib/plugins/sparql/parser.py +++ b/rdflib/plugins/sparql/parser.py @@ -17,7 +17,7 @@ from parserutils import Comp, Param, ParamList from . import operators as op -from rdflib.py3compat import decodeStringEscape +from rdflib.py3compat import decodeUnicodeEscape, bytestype import rdflib @@ -287,14 +287,14 @@ def _hexExpand(match): # ) + ZeroOrMore( ~ Literal("'\\") | ECHAR ) ) + "'''" STRING_LITERAL_LONG1 = Regex(ur"'''((?:'|'')?(?:[^'\\]|\\['ntbrf\\]))*'''") STRING_LITERAL_LONG1.setParseAction( - lambda x: rdflib.Literal(decodeStringEscape(x[0][3:-3]))) + lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][3:-3]))) # [159] STRING_LITERAL_LONG2 ::= '"""' ( ( '"' | '""' )? ( [^"\] | ECHAR ) )* '"""' # STRING_LITERAL_LONG2 = Literal('"""') + ( Optional( Literal('"') | '""' # ) + ZeroOrMore( ~ Literal('"\\') | ECHAR ) ) + '"""' STRING_LITERAL_LONG2 = Regex(ur'"""(?:(?:"|"")?(?:[^"\\]|\\["ntbrf\\]))*"""') STRING_LITERAL_LONG2.setParseAction( - lambda x: rdflib.Literal(decodeStringEscape(x[0][3:-3]))) + lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][3:-3]))) # [156] STRING_LITERAL1 ::= "'" ( ([^#x27#x5C#xA#xD]) | ECHAR )* "'" # STRING_LITERAL1 = Literal("'") + ZeroOrMore( @@ -303,7 +303,7 @@ def _hexExpand(match): STRING_LITERAL1 = Regex( ur"'(?:[^'\n\r\\]|\\['ntbrf\\])*'(?!')", flags=re.U) STRING_LITERAL1.setParseAction( - lambda x: rdflib.Literal(decodeStringEscape(x[0][1:-1]))) + lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][1:-1]))) # [157] STRING_LITERAL2 ::= '"' ( ([^#x22#x5C#xA#xD]) | ECHAR )* '"' # STRING_LITERAL2 = Literal('"') + ZeroOrMore ( @@ -312,7 +312,7 @@ def _hexExpand(match): STRING_LITERAL2 = Regex( ur'"(?:[^"\n\r\\]|\\["ntbrf\\])*"(?!")', flags=re.U) STRING_LITERAL2.setParseAction( - lambda x: rdflib.Literal(decodeStringEscape(x[0][1:-1]))) + lambda x: rdflib.Literal(decodeUnicodeEscape(x[0][1:-1]))) # [161] NIL ::= '(' WS* ')' NIL = Literal('(') + ')' @@ -1045,6 +1045,9 @@ def expand(m): def parseQuery(q): if hasattr(q, 'read'): q = q.read() + if isinstance(q, bytestype): + q = q.decode('utf-8') + q = expandUnicodeEscapes(q) return Query.parseString(q, parseAll=True) @@ -1052,6 +1055,10 @@ def parseQuery(q): def parseUpdate(q): if hasattr(q, 'read'): q = q.read() + + if isinstance(q, bytestype): + q = q.decode('utf-8') + q = expandUnicodeEscapes(q) return UpdateUnit.parseString(q, parseAll=True)[0] diff --git a/rdflib/plugins/sparql/results/csvresults.py b/rdflib/plugins/sparql/results/csvresults.py index 61f66a737..88c95c30e 100644 --- a/rdflib/plugins/sparql/results/csvresults.py +++ b/rdflib/plugins/sparql/results/csvresults.py @@ -7,7 +7,7 @@ """ - +import codecs import csv from rdflib import Variable, BNode, URIRef, Literal, py3compat @@ -23,6 +23,9 @@ def parse(self, source): r = Result('SELECT') + if hasattr(source, 'mode') and 'b' in source.mode: + source = codecs.getreader('utf-8')(source) + reader = csv.reader(source, delimiter=self.delim) r.vars = [Variable(x) for x in reader.next()] r.bindings = [] diff --git a/rdflib/plugins/sparql/results/jsonresults.py b/rdflib/plugins/sparql/results/jsonresults.py index 8c807fd3a..aca8a0f40 100644 --- a/rdflib/plugins/sparql/results/jsonresults.py +++ b/rdflib/plugins/sparql/results/jsonresults.py @@ -2,6 +2,9 @@ Result, ResultException, ResultSerializer, ResultParser) from rdflib import Literal, URIRef, BNode, Variable +from rdflib.py3compat import bytestype + + import jsonlayer """A Serializer for SPARQL results in JSON: @@ -19,7 +22,10 @@ class JSONResultParser(ResultParser): def parse(self, source): - return JSONResult(jsonlayer.decode(source.read())) + inp = source.read() + if isinstance(inp, bytestype): + inp = inp.decode('utf-8') + return JSONResult(jsonlayer.decode(inp)) class JSONResultSerializer(ResultSerializer): diff --git a/rdflib/plugins/sparql/results/tsvresults.py b/rdflib/plugins/sparql/results/tsvresults.py index f25eb81bf..b85f61b85 100644 --- a/rdflib/plugins/sparql/results/tsvresults.py +++ b/rdflib/plugins/sparql/results/tsvresults.py @@ -5,6 +5,8 @@ It is implemented with pyparsing, reusing the elements from the SPARQL Parser """ +import codecs + from pyparsing import ( Optional, ZeroOrMore, Literal, ParserElement, ParseException, Suppress) @@ -38,6 +40,9 @@ class TSVResultParser(ResultParser): def parse(self, source): + if hasattr(source, 'mode') and 'b' in source.mode: + source = codecs.getreader('utf-8')(source) + try: r = Result('SELECT') diff --git a/rdflib/py3compat.py b/rdflib/py3compat.py index dbbf85010..9324e5c69 100644 --- a/rdflib/py3compat.py +++ b/rdflib/py3compat.py @@ -52,6 +52,9 @@ def b(s): def ascii(stream): return codecs.getreader('ascii')(stream) + def bopen(*args, **kwargs): + return open(*args, mode = 'rb', **kwargs) + bytestype = bytes # Abstract u'abc' syntax: @@ -102,6 +105,8 @@ def b(s): def ascii(stream): return stream + bopen = open + bytestype = str # Abstract u'abc' syntax: diff --git a/test/DAWG/rdflib/unicode.ttl b/test/DAWG/rdflib/unicode.ttl index 281ac1679..ee9714113 100644 --- a/test/DAWG/rdflib/unicode.ttl +++ b/test/DAWG/rdflib/unicode.ttl @@ -1 +1 @@ - "孫子兵法" . \ No newline at end of file + "\u5b6b\u5b50\u5175\u6cd5" . \ No newline at end of file diff --git a/test/test_dawg.py b/test/test_dawg.py index 00dc718ee..84aac0f8a 100644 --- a/test/test_dawg.py +++ b/test/test_dawg.py @@ -41,7 +41,7 @@ def most_common(self, N): from rdflib.plugins.sparql.results.rdfresults import RDFResultParser from rdflib.plugins.sparql.update import evalUpdate -from rdflib.py3compat import decodeStringEscape +from rdflib.py3compat import decodeStringEscape, bopen from nose.tools import nottest, eq_ from nose import SkipTest @@ -217,10 +217,10 @@ def update_test(t): if not res: if syntax: - translateUpdate(parseUpdate(open(query[7:]))) + translateUpdate(parseUpdate(bopen(query[7:]))) else: try: - translateUpdate(parseUpdate(open(query[7:]))) + translateUpdate(parseUpdate(bopen(query[7:]))) raise AssertionError("Query shouldn't have parsed!") except: pass # negative syntax test @@ -236,7 +236,7 @@ def update_test(t): for x, l in graphdata: g.load(x, publicID=URIRef(l), format=_fmt(x)) - req = translateUpdate(parseUpdate(open(query[7:]))) + req = translateUpdate(parseUpdate(bopen(query[7:]))) evalUpdate(g, req) # read expected results @@ -284,33 +284,33 @@ def update_test(t): if data: print "----------------- DATA --------------------" print ">>>", data - print open(data[7:]).read() + print bopen(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x, l in graphdata: print ">>>", x, l - print open(x[7:]).read() + print bopen(x[7:]).read() print "----------------- Request -------------------" print ">>>", query - print open(query[7:]).read() + print bopen(query[7:]).read() if res: if resdata: print "----------------- RES DATA --------------------" print ">>>", resdata - print open(resdata[7:]).read() + print bopen(resdata[7:]).read() if resgraphdata: print "----------------- RES GRAPHDATA -------------------" for x, l in resgraphdata: print ">>>", x, l - print open(x[7:]).read() + print bopen(x[7:]).read() print "------------- MY RESULT ----------" print g.serialize(format='trig') try: - pq = translateUpdate(parseUpdate(open(query[7:]).read())) + pq = translateUpdate(parseUpdate(bopen(query[7:]).read())) print "----------------- Parsed ------------------" pprintAlgebra(pq) # print pq @@ -336,7 +336,7 @@ def query_test(t): def skip(reason='(none)'): print "Skipping %s from now on." % uri - f = open("skiptests.list", "a") + f = bopen("skiptests.list", "a") f.write("%s\t%s\n" % (uri, reason)) f.close() @@ -354,12 +354,12 @@ def skip(reason='(none)'): if syntax: translateQuery(parseQuery( - open(query[7:]).read()), base=urljoin(query, '.')) + bopen(query[7:]).read()), base=urljoin(query, '.')) else: # negative syntax test try: translateQuery(parseQuery( - open(query[7:]).read()), base=urljoin(query, '.')) + bopen(query[7:]).read()), base=urljoin(query, '.')) assert False, 'Query should not have parsed!' except: @@ -367,7 +367,7 @@ def skip(reason='(none)'): return # eval test - carry out query - res2 = g.query(open(query[7:]).read(), base=urljoin(query, '.')) + res2 = g.query(bopen(query[7:]).read(), base=urljoin(query, '.')) if resfile.endswith('ttl'): resg = Graph() @@ -378,12 +378,12 @@ def skip(reason='(none)'): resg.load(resfile, publicID=resfile) res = RDFResultParser().parse(resg) elif resfile.endswith('srj'): - res = Result.parse(open(resfile[7:]), format='json') + res = Result.parse(bopen(resfile[7:]), format='json') elif resfile.endswith('tsv'): - res = Result.parse(open(resfile[7:]), format='tsv') + res = Result.parse(bopen(resfile[7:]), format='tsv') elif resfile.endswith('csv'): - res = Result.parse(open(resfile[7:]), format='csv') + res = Result.parse(bopen(resfile[7:]), format='csv') # CSV is lossy, round-trip our own resultset to # lose the same info :) @@ -396,7 +396,7 @@ def skip(reason='(none)'): res2 = Result.parse(s, format='csv') else: - res = Result.parse(open(resfile[7:]), format='xml') + res = Result.parse(bopen(resfile[7:]), format='xml') if not DETAILEDASSERT: eq(res.type, res2.type, 'Types do not match') @@ -461,23 +461,23 @@ def skip(reason='(none)'): if data: print "----------------- DATA --------------------" print ">>>", data - print open(data[7:]).read() + print bopen(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x in graphdata: print ">>>", x - print open(x[7:]).read() + print bopen(x[7:]).read() print "----------------- Query -------------------" print ">>>", query - print open(query[7:]).read() + print bopen(query[7:]).read() if resfile: print "----------------- Res -------------------" print ">>>", resfile - print open(resfile[7:]).read() + print bopen(resfile[7:]).read() try: - pq = parseQuery(open(query[7:]).read()) + pq = parseQuery(bopen(query[7:]).read()) print "----------------- Parsed ------------------" pprintAlgebra(translateQuery(pq, base=urljoin(query, '.'))) except: