From 382d04bad4d59b3a9204c1b42f2c4751c86f1f82 Mon Sep 17 00:00:00 2001 From: Nipun Sadvilkar Date: Fri, 29 May 2020 22:37:42 +0530 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20=20Limit=20NUMBERED=5FREFERENCE?= =?UTF-8?q?=5FREGEX=20for=20zero=20or=20one=20time?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #58 --- pysbd/lang/common/numbers.py | 2 +- tests/regression/test_issues.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pysbd/lang/common/numbers.py b/pysbd/lang/common/numbers.py index ff3295e..5063c57 100644 --- a/pysbd/lang/common/numbers.py +++ b/pysbd/lang/common/numbers.py @@ -26,7 +26,7 @@ class Common(object): # https://rubular.com/r/UkumQaILKbkeyc # https://github.com/diasks2/pragmatic_segmenter/commit/d9ec1a352aff92b91e2e572c30bb9561eb42c703 - NUMBERED_REFERENCE_REGEX = r'(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)*\b\d{1,3}\])+|((\d{1,3}\s?)*\d{1,3}))(\s)(?=[A-Z])' + NUMBERED_REFERENCE_REGEX = r'(?<=[^\d\s])(\.|∯)((\[(\d{1,3},?\s?-?\s?)*\b\d{1,3}\])+|((\d{1,3}\s?)?\d{1,3}))(\s)(?=[A-Z])' # # Rubular: http://rubular.com/r/yqa4Rit8EY PossessiveAbbreviationRule = Rule(r"\.(?='s\s)|\.(?='s$)|\.(?='s\Z)", '∯') diff --git a/tests/regression/test_issues.py b/tests/regression/test_issues.py index cc1f989..3ce421e 100644 --- a/tests/regression/test_issues.py +++ b/tests/regression/test_issues.py @@ -26,7 +26,9 @@ ['As an example of a different special-purpose mechanism, we have introduced a methodology for letting donors make their donations to charities conditional on donations by other donors (who, in turn, can make their donations conditional) [70].', 'We have used this mechanism to collect money for Indian Ocean Tsunami and Hurricane Katrina victims.', "We have also introduced a more general framework for negotiation when one agent's actions have a direct effect (externality) on the other agents' utilities [69].", 'Both the charities and externalities methodologies require the solution of NP-hard optimization problems in general, but there are some natural tractable cases as well as effective MIP formulations.', 'Recently, Ghosh and Mahdian [86] at Yahoo! Research extended our charities work, and based on this a web-based system for charitable donations was built at Yahoo!']), ('#39', "T stands for the vector transposition. As shown in Fig. ??", ["T stands for the vector transposition.", "As shown in Fig. ??"]), - ('#39', 'Fig. ??', ['Fig. ??']) + ('#39', 'Fig. ??', ['Fig. ??']), + ('#58', 'Rok bud.2027777983834843834843042003200220012000199919981997199619951994199319921991199019891988198042003200220012000199919981997199619951994199319921991199019891988198', + ['Rok bud.2027777983834843834843042003200220012000199919981997199619951994199319921991199019891988198042003200220012000199919981997199619951994199319921991199019891988198']) ] TEST_ISSUE_DATA_CHAR_SPANS = [