Skip to content

Commit

Permalink
Merge pull request #16 from OnroerendErfgoed/FIX_0.1.1
Browse files Browse the repository at this point in the history
Fix 0.1.1
  • Loading branch information
goessebr authored Sep 11, 2019
2 parents d598e9e + 1959cf7 commit 4e9333c
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 16 deletions.
7 changes: 7 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
0.1.1 (11-09-2019)
------------------

- samenvattende adresnotatie werkt nog niet bij overlappende ranges van huisnummers (#9)
- Unicode error bij aanmaken adressamenvatting (#11)
- Problemen indien bisnummer meer dan 1 karakter bevat (#12)

0.1.0 (06-08-2019)
------------------

Expand Down
28 changes: 21 additions & 7 deletions housenumparser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import logging

from housenumparser import merger
from housenumparser import reader
from housenumparser.element import ReadException

LOG = logging.getLogger(__name__)


def split(data, step=None, on_exc=ReadException.Action.ERROR_MSG):
"""
Expand All @@ -22,11 +28,15 @@ def split(data, step=None, on_exc=ReadException.Action.ERROR_MSG):
:returns: A list of :class:`.element.SingleElement`
"""
if isinstance(data, list):
numbers = reader.read_iterable(data, step=step, on_exc=on_exc)
else:
numbers = reader.read_data(data, step=step, on_exc=on_exc)
return [item for number in numbers for item in number.split()]
try:
if isinstance(data, list):
numbers = reader.read_iterable(data, step=step, on_exc=on_exc)
else:
numbers = reader.read_data(data, step=step, on_exc=on_exc)
return [item for number in numbers for item in number.split()]
except Exception: # noqa
LOG.error("Could not split data: {}".format(data))
raise


def merge(data, on_exc=ReadException.Action.ERROR_MSG):
Expand All @@ -42,5 +52,9 @@ def merge(data, on_exc=ReadException.Action.ERROR_MSG):
:returns: A list of :class:`.element.Element`
"""
numbers = split(data, on_exc=on_exc)
return merger.merge_data(merger.group(numbers))
try:
numbers = split(data, on_exc=on_exc)
return merger.merge_data(merger.group(numbers))
except Exception: # noqa
LOG.error("Could not merge data: {}".format(data))
raise
8 changes: 6 additions & 2 deletions housenumparser/element.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals


import re
from builtins import str
from enum import Enum


Expand Down Expand Up @@ -409,7 +413,7 @@ class BusLetter(SingleElement):
eg: "3 bus A" or "53 bus D"
"""
sequence_class = BusLetterSequence
regex = re.compile(r'^(\d+)bus([a-zA-Z]+)$')
regex = re.compile(r'^(\d+)bus([a-zA-Z])$')

def __init__(self, house_number, bus_letter):
"""
Expand Down Expand Up @@ -438,7 +442,7 @@ class BisLetter(SingleElement):
eg: "3A" or "53D"
"""
sequence_class = BisLetterSequence
regex = re.compile(r'^(\d+)[/_]?([a-zA-Z]+)$')
regex = re.compile(r'^(\d+)[/_]?([a-zA-Z])$')

def __init__(self, house_number, bis_letter):
"""
Expand Down
18 changes: 16 additions & 2 deletions housenumparser/merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"""
Module which takes a string of house numbers and turns them into series.
"""
from __future__ import unicode_literals

import collections

from housenumparser.element import BisLetter
Expand All @@ -14,6 +16,7 @@
from housenumparser.element import BusNumberSequence
from housenumparser.element import HouseNumber
from housenumparser.element import HouseNumberSequence
from housenumparser.element import ReadException


def group(data):
Expand All @@ -31,7 +34,8 @@ def group(data):
'bis_numbers': [],
'bis_letters': [],
'bus_numbers': [],
'bus_letters': []
'bus_letters': [],
'bad_data': [],
}
for x in data:
if isinstance(x, HouseNumber):
Expand All @@ -44,16 +48,21 @@ def group(data):
result['bus_numbers'].append(x)
elif isinstance(x, BusLetter):
result['bus_letters'].append(x)
elif isinstance(x, ReadException):
result['bad_data'].append(x)
return result


def merge_data(data):
def merge_data(data, on_exc=ReadException.Action.ERROR_MSG):
"""
Merges single elements into sequences where possible.
:type data: dict[str, list[.element.SingleElement]]
:param data: data as returned by the `group` function
:type on_exc: :class:`.element.ReadException.Action`
:param on_exc: Flag on how to treat incorrect data. Default ERROR_MSG.
:returns: A list of :class:`.element.SingleElement` and if possible
:class:`.element.SequenceElement`.
"""
Expand Down Expand Up @@ -112,6 +121,10 @@ def merge_data(data):
chr(last)),
(1,))
)
# raise wouldn't have reached this point, drop needs no action.
if on_exc in (ReadException.Action.ERROR_MSG,
ReadException.Action.KEEP_ORIGINAL):
merged_data.extend(data['bad_data'])
return merged_data


Expand Down Expand Up @@ -145,6 +158,7 @@ def merge_numbers(data, single_result, sequence_result, allowed_steps):
:returns: List of :class:`.element.Element`, using Sequences if possible.
"""
data = list(set(data))
data.sort()
total_len = len(data)
data = data + [-1, -1]
Expand Down
8 changes: 6 additions & 2 deletions housenumparser/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
- "23", "24 bus 2" -> [<HouseNumber> "23", <BusNumber> "24 bus 2"]
- "25-27" -> [<HouseNumberSequence> "25-26", <HouseNumber> "27"]
"""
from __future__ import unicode_literals

import re
from builtins import str

from housenumparser.element import BisLetter
from housenumparser.element import BisLetterSequence
Expand Down Expand Up @@ -41,7 +44,8 @@ def read_data(data, step=None, on_exc=ReadException.Action.ERROR_MSG):
:returns: A list from of the data.
"""
return read_iterable(str(data).split(","), step=step, on_exc=on_exc)
return read_iterable(str(data).split(","), step=step,
on_exc=on_exc)


def read_iterable(inputs, step=None, on_exc=ReadException.Action.ERROR_MSG):
Expand All @@ -64,7 +68,7 @@ def read_iterable(inputs, step=None, on_exc=ReadException.Action.ERROR_MSG):
"""
result = []
for data in inputs:
data = data.strip()
data = data.strip() if data else str(data)
parsed_element = read_element(data, step=step, on_exc=on_exc)
if parsed_element is not None:
result.append(parsed_element)
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
enum34==1.1.6;python_version < '3.4'
future==0.17.1;python_version < '3.4'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

setup(
name='housenumparser',
version='0.1.0',
version='0.1.1',
description='housenum_be_r_parser',
long_description=README,
author='Onroerend Erfgoed',
Expand Down
65 changes: 65 additions & 0 deletions tests/test_merge.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from builtins import str

import pytest

import housenumparser
from housenumparser.element import ReadException


def test_all_forms():
Expand All @@ -21,6 +28,22 @@ def test_all_forms():
assert '33 bus A-C' in house_numbers


def test_none():
label = [None, '1']
house_numbers = housenumparser.merge(
label, on_exc=ReadException.Action.KEEP_ORIGINAL
)
house_numbers = [str(house_number) for house_number in house_numbers]
assert 'None' in house_numbers
assert '1' in house_numbers
label = None
house_numbers = housenumparser.merge(
label, on_exc=ReadException.Action.KEEP_ORIGINAL
)
house_numbers = [str(house_number) for house_number in house_numbers]
assert 'None' in house_numbers


def test_house_number_sequences():
label = ('32, 34, 36, 38, 25, 27, 29, 31, 39, 40, 41, 42, 43, 44, 46, '
'47, 48, 49, 50, 52, 54')
Expand Down Expand Up @@ -75,3 +98,45 @@ def test_house_number_no_sequences():
house_numbers = [str(house_number) for house_number in house_numbers]
assert '32' in house_numbers
assert '37' in house_numbers


def test_overlapping_ranges():
label = '1-11, 5-9'
house_numbers = housenumparser.merge(label)
assert isinstance(house_numbers, list)
assert 1 == len(house_numbers)
assert '1-11' == str(house_numbers[0])


def test_special_characters():
"""
While we can't parse special characterse into house numbers, We still
should handle this in a proper way rather than just crashing.
The 'proper way' is dependant on the `on_exc` parameter.
"""
label = u'1ëâB, 1-11'
house_numbers = housenumparser.merge(label,
on_exc=ReadException.Action.DROP)
assert isinstance(house_numbers, list)
assert 1 == len(house_numbers)
assert '1-11' == str(house_numbers[0])

with pytest.raises(ValueError) as e:
housenumparser.merge(label, on_exc=ReadException.Action.RAISE)
assert 'Could not parse/understand: 1ëâB' == str(e.value)

house_numbers = housenumparser.merge(
label, on_exc=ReadException.Action.KEEP_ORIGINAL
)
assert isinstance(house_numbers, list)
assert 2 == len(house_numbers)
assert '1-11' == str(house_numbers[0])
assert '1ëâB' == str(house_numbers[1])

house_numbers = housenumparser.merge(label,
on_exc=ReadException.Action.ERROR_MSG)
assert isinstance(house_numbers, list)
assert 2 == len(house_numbers)
assert '1-11' == str(house_numbers[0])
assert 'Could not parse/understand: 1ëâB' == str(house_numbers[1])
28 changes: 26 additions & 2 deletions tests/test_split.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from builtins import str

import pytest

import housenumparser
Expand Down Expand Up @@ -40,6 +44,16 @@ def test_single_bis_letter():
assert str(hnr) == '25A'


def test_double_bis_letter():
label = '25AB'
house_numbers = housenumparser.split(label)
assert isinstance(house_numbers, list)
assert len(house_numbers) == 1
hnr = house_numbers[0]
assert isinstance(hnr, ReadException)
assert str(hnr) == 'Could not parse/understand: 25AB'


def test_single_bis_number():
label = '25/1'
house_numbers = housenumparser.split(label)
Expand Down Expand Up @@ -80,6 +94,16 @@ def test_bus_letter():
assert str(hnr) == '25 bus A'


def test_double_bus_letter():
label = '25 bus AB'
house_numbers = housenumparser.split(label)
assert isinstance(house_numbers, list)
assert len(house_numbers) == 1
hnr = house_numbers[0]
assert isinstance(hnr, ReadException)
assert str(hnr) == 'Could not parse/understand: 25 bus AB'


def test_house_number_sequence():
label = '25,27,29,31'
house_numbers = housenumparser.split(label)
Expand Down Expand Up @@ -344,5 +368,5 @@ def test_split_wrong_bus_letter_range():


def test_wrong_on_exc():
element = ReadException('error_msg', on_exc='invalid')
assert '<ReadException> Not implemented on_exc: invalid' == str(element)
element = ReadException('error_msg', on_exc='ïnvalid')
assert '<ReadException> Not implemented on_exc: ïnvalid' == str(element)

0 comments on commit 4e9333c

Please sign in to comment.