This repository has been archived by the owner on Jan 8, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathrules.yaml
148 lines (118 loc) · 5.28 KB
/
rules.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# Address Parsing Rules
address_parts:
# Unfiltered set of address parts. Currently maps directly to usaddress parts.
standard:
- id: &address_number address_number
usaddress: AddressNumber
description: 'The address number, e.g. "1234" Main St.'
- id: &address_number_prefix address_number_prefix
usaddress: AddressNumberPrefix
description: 'A modifier before an address number, e.g. "Mile", "#"'
- id: &address_number_suffix address_number_suffix
usaddress: AddressNumberSuffix
description: 'A modifier after an address number, e.g "B", "1/2"'
- id: &building_name building_name
usaddress: BuildingName
description: 'The name of a building, e.g. "Atlanta Financial Center"'
- id: &corner_of corner_of
usaddress: CornerOf
description: 'Words indicating that an address is a corner, e.g. "Junction", "corner of"'
- id: &intersection_separator intersection_separator
usaddress: IntersectionSeparator
description: 'A conjunction connecting parts of an intersection, e.g. "and", "&"'
- id: &landmark_name landmark_name
usaddress: LandmarkName
description: 'The name of a landmark, e.g. "Wrigley Field", "Union Station"'
- id: ¬_address not_address
usaddress: NotAddress
description: "A non-address component that doesn't refer to a recipient"
- id: &occupancy_type occupancy_type
usaddress: OccupancyType
description: 'A type of occupancy within a building, e.g. "Suite", "Apt", "Floor"'
- id: &occupancy_id occupancy_id
usaddress: OccupancyIdentifier
description: 'The identifier of an occupancy, often a number or letter'
- id: &city_name city_name
usaddress: PlaceName
description: 'The name of the city'
- id: &recipient recipient
usaddress: Recipient
description: 'A non-address recipient, e.g. the name of a person/organization'
- id: &state_name state_name
usaddress: StateName
description: 'The name of the state'
- id: &street_name street_name
usaddress: StreetName
description: 'The street name, excluding type & direction'
- id: &street_name_pre_directional street_name_pre_directional
usaddress: StreetNamePreDirectional
description: 'A direction before a street name, e.g. "North", "S"'
- id: &street_name_pre_modifier street_name_pre_modifier
usaddress: StreetNamePreModifier
description: 'A modifier before a street name that is not a direction, e.g. "Old"'
- id: &street_name_pre_type street_name_pre_type
usaddress: StreetNamePreType
description: 'A street type that comes before a street name, e.g. "Route", "Ave"'
- id: &street_name_post_directional street_name_post_directional
usaddress: StreetNamePostDirectional
description: 'A direction after a street name, e.g. "North", "S"'
- id: &street_name_post_modifier street_name_post_modifier
usaddress: StreetNamePostModifier
description: 'A modifier adter a street name, e.g. "Ext"'
- id: &street_name_post_type street_name_post_type
usaddress: StreetNamePostType
description: 'A street type that comes after a street name, e.g. "Avenue", "Rd"'
- id: &subaddress_id subaddress_id
usaddress: SubaddressIdentifier
description: 'The name/identifier of a subaddress component'
- id: &subaddress_type subaddress_type
usaddress: SubaddressType
description: 'A level of detail in an address that is not an occupancy within a building, e.g. "Building", "Tower"'
- id: &usps_box_group_id usps_box_group_id
usaddress: USPSBoxGroupID
description: 'The identifier of a USPS box group, usually a number'
- id: &usps_box_group_type usps_box_group_type
usaddress: USPSBoxGroupType
description: 'A name for a group of USPS boxes, e.g. "R"'
- id: &postal_box_id postal_box_id
usaddress: USPSBoxID
description: 'The identifier of a USPS box, usually a number'
- id: &usps_box_type usps_box_type
usaddress: USPSBoxType
description: 'A USPS box, e.g. "P.O. Box"'
- id: &zip_code zip_code
usaddress: ZipCode
description: 'The zip code'
# Alternate address parts made up of multiple "standard" parts
derived:
- id: &address_number_full address_number_full
description: 'All address number parts combined, e.g. "123 1/2", "12 B"'
parts:
- *address_number_prefix
- *address_number
- *address_number_suffix
- id: &street_name_full street_name_full
description: 'All address name parts combined, e.g. "Pennsylvania Ave NW"'
parts:
- *street_name_pre_directional
- *street_name_pre_modifier
- *street_name_pre_type
- *street_name
- *street_name_post_type
- *street_name_post_directional
- *street_name_post_modifier
# Alternate parsing rulesets
profiles:
- id: grasshopper
required:
- *address_number_full
- *street_name_full
- *state_name
- *zip_code
# TODO: "optional" may be useful if we start excluding all other parts
# optional:
# - *city_name
# TODO: Include "invalid" address parts for added validation
# invalid:
# - *corner_of
# - *intersection_indicator