From fcab6bfa179cb0768ec7107e945298bac0236be8 Mon Sep 17 00:00:00 2001 From: Hussein Date: Tue, 4 Feb 2020 23:19:21 -0600 Subject: [PATCH 1/3] Porting to Python 3 --- stanford_corenlp_pywrapper/__init__.py | 2 +- stanford_corenlp_pywrapper/sockwrap.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/stanford_corenlp_pywrapper/__init__.py b/stanford_corenlp_pywrapper/__init__.py index c9e6e96..eace788 100644 --- a/stanford_corenlp_pywrapper/__init__.py +++ b/stanford_corenlp_pywrapper/__init__.py @@ -1 +1 @@ -from sockwrap import * +from .sockwrap import * diff --git a/stanford_corenlp_pywrapper/sockwrap.py b/stanford_corenlp_pywrapper/sockwrap.py index c575693..da50a45 100644 --- a/stanford_corenlp_pywrapper/sockwrap.py +++ b/stanford_corenlp_pywrapper/sockwrap.py @@ -74,7 +74,7 @@ def command(mode=None, configfile=None, configdict=None, comm_mode=None, else: assert False, "need comm_mode to be SOCKET or PIPE but got " + repr(comm_mode) - cmd = """exec {java_command} {java_options} -cp '{classpath}' + cmd = """exec {java_command} {java_options} -cp '{classpath}' corenlp.SocketServer {comm_info} {more_config}""" return cmd.format(**d).replace("\n", " ") @@ -85,7 +85,7 @@ class SubprocessCrashed(Exception): class CoreNLP: - def __init__(self, mode=None, + def __init__(self, mode=None, configfile=None, configdict=None, corenlp_jars=( "/home/sw/corenlp/stanford-corenlp-full-2015-04-20/*", @@ -168,7 +168,7 @@ def start_server(self): if self.comm_mode=='PIPE': if not os.path.exists(self.outpipe): os.mkfifo(self.outpipe) - + cmd = command(**self.__dict__) LOG.info("Starting java subprocess, and waiting for signal it's ready, with command: %s" % cmd) self.proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE) @@ -191,7 +191,7 @@ def start_server(self): assert ret == "PONG", "Bad return data on startup ping: " + ret LOG.info("Successful ping. The server has started.") break - except socket.error, e: + except socket.error as e: LOG.info("Waiting for startup: ping got exception: %s %s" % (type(e), e)) LOG.info("pausing before retry") time.sleep(STARTUP_BUSY_WAIT_INTERVAL_SEC) @@ -251,7 +251,7 @@ def send_command_and_parse_result(self, cmd, timeout, raw=False): LOG.warning("Bad JSON length %d, starts with: %s" % (len(data), repr(data[:1000]))) return None return decoded - except socket.timeout, e: + except socket.timeout as e: LOG.info("Socket timeout happened, returning None: %s %s" % (type(e), e)) return None # This is tricky. maybe the process is running smoothly but just @@ -329,7 +329,7 @@ def test_paths(): def assert_no_java(msg=""): ps_output = os.popen("ps wux").readlines() javalines = [x for x in ps_output if re.search(r'\bbin/java\b', x)] - print ''.join(javalines) + print (''.join(javalines)) assert len(javalines) == 0, msg # def test_doctimeout(): @@ -345,8 +345,8 @@ def assert_no_java(msg=""): import sys if sys.argv[1]=='modes': for mode,d in MODES_items: - print " * `%s`: %s" % (mode, d['description']) + print (" * `%s`: %s" % (mode, d['description'])) if sys.argv[1]=='modes_json': # import json as stdjson # print stdjson.dumps(MODES, indent=4) - print '"%s"' % json.dumps(MODES).replace('"', r'\"') + print ('"%s"' % json.dumps(MODES).replace('"', r'\"')) From bcd87aa64b394ece7229466979f3fb28a9856ec5 Mon Sep 17 00:00:00 2001 From: Hussein Date: Wed, 5 Feb 2020 00:54:09 -0600 Subject: [PATCH 2/3] Few edits for porting to Python 3, strings to bytes for console out --- stanford_corenlp_pywrapper/sockwrap.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stanford_corenlp_pywrapper/sockwrap.py b/stanford_corenlp_pywrapper/sockwrap.py index da50a45..0eb67ac 100644 --- a/stanford_corenlp_pywrapper/sockwrap.py +++ b/stanford_corenlp_pywrapper/sockwrap.py @@ -266,13 +266,13 @@ def send_command_and_get_string_result(self, cmd, timeout): sock.sendall(cmd + "\n") size_info_str = sock.recv(8) elif self.comm_mode == 'PIPE': - self.proc.stdin.write(cmd + "\n") + self.proc.stdin.write((cmd+"\n").encode('utf-8')) self.proc.stdin.flush() size_info_str = self.outpipe_fp.read(8) # java "long" is 8 bytes, which python struct calls "long long". # java default byte ordering is big-endian. - size_info = struct.unpack('>Q', size_info_str)[0] + size_info = struct.unpack(('>Q').encode('utf-8'), (size_info_str).encode('utf-8'))[0] # print "size expected", size_info chunks = [] From d24d110d5fae850b943365883bfb0764349d5754 Mon Sep 17 00:00:00 2001 From: Hussein Al-Olimat Date: Wed, 5 Feb 2020 14:11:08 -0600 Subject: [PATCH 3/3] Handling string encodings --- stanford_corenlp_pywrapper/sockwrap.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/stanford_corenlp_pywrapper/sockwrap.py b/stanford_corenlp_pywrapper/sockwrap.py index 0eb67ac..a2cad24 100644 --- a/stanford_corenlp_pywrapper/sockwrap.py +++ b/stanford_corenlp_pywrapper/sockwrap.py @@ -178,7 +178,7 @@ def start_server(self): sock = self.get_socket(num_retries=100, retry_interval=STARTUP_BUSY_WAIT_INTERVAL_SEC) sock.close() elif self.comm_mode=='PIPE': - self.outpipe_fp = open(self.outpipe, 'r') + self.outpipe_fp = open(self.outpipe, 'rb') while True: # This loop is for if you have timeouts for the socket connection @@ -272,8 +272,7 @@ def send_command_and_get_string_result(self, cmd, timeout): # java "long" is 8 bytes, which python struct calls "long long". # java default byte ordering is big-endian. - size_info = struct.unpack(('>Q').encode('utf-8'), (size_info_str).encode('utf-8'))[0] - # print "size expected", size_info + size_info = struct.unpack(('>Q').encode('utf-8'), size_info_str)[0] chunks = [] curlen = lambda: sum(len(x) for x in chunks) @@ -283,7 +282,7 @@ def send_command_and_get_string_result(self, cmd, timeout): data = sock.recv(remaining_size) elif self.comm_mode == 'PIPE': data = self.outpipe_fp.read(remaining_size) - chunks.append(data) + chunks.append(data.decode("utf-8")) if curlen() >= size_info: break if len(chunks) > 1000: LOG.warning("Incomplete value from server")