-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathosm.js
364 lines (342 loc) · 13.4 KB
/
osm.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
// Everything to do with downloading the latest osm.pbf data from openstreetmap,
// extracting street names, parsing street names and saving the output locally.
import { Transform } from "stream"
import fs from 'fs-extra'
import osm_parser from 'osm-pbf-parser'
import stringify from "json-stringify-pretty-compact"
import * as R from "ramda"
import { equivalentStreet } from "./equivalents.js"
import { stripAffixes } from "./affixes.js"
import { personsCountryFile, personsCountryRead, personsCountryWrite} from "./persons.js"
import { fileLocation, read, then } from "./pills.js"
export const osmDownload = (country) => R.pipe(
countryRegion,
region => `http://download.geofabrik.de/${region}/${country}-latest.osm.pbf`,
fetch,
then(v => v.arrayBuffer()),
then(Buffer.from),
then(buffer => fs.createWriteStream(osmPbfFile(country)).write(buffer)),
)(country)
// Explore the osm data. I've used this to explore what osm fields contain
// street data.
export const osmInspect = (country, regex) => fs.createReadStream(osmPbfFile(country))
.on("open", () => fs.writeFileSync(osmInspectFile(country), "create new file"))
.pipe(new osm_parser())
.pipe(new Transform({
objectMode: true,
transform: (chunk, _encoding, callback) =>
R.pipe(
R.map(JSON.stringify),
R.filter(R.test(new RegExp(regex, "i"))),
R.map(JSON.parse),
// Ignore non-interesting fields
R.map(R.omit(["id", "lat", "lon", "info", "refs", "members"])),
out => out.length > 0 ?
fs.appendFileSync(osmInspectFile(country), JSON.stringify(out, null, 2))
: "ignore",
() => callback(null, null)
)(chunk)
}))
export const osmExtract = (country) => {
const streets = fs.createWriteStream(osmRawFile(country))
// Add metadata; only the osm last modified date, for now
streets.write(`[["${osmPbfFileDate(country)}"]`)
fs.createReadStream(osmPbfFile(country))
.pipe(new osm_parser())
.pipe(new Transform({
objectMode: true,
transform: (chunk, _encoding, callback) =>
R.pipe(
// Keep only pbf entries of type `node` and `way`. If other pbf entries
// are found to contain relevant street name data, include them here.
R.filter(R.compose(
R.includes(R.__, ["node", "way"]),
R.prop("type")
)),
// Extract the `tags` key. If it exists, this key contains the street
// names. If not, ignore this entry.
R.filter(R.prop("tags")),
R.map(R.prop("tags")),
// There are multiple keys that contain the city+street name duo. Check
// all known combinations on each entry and keep the ones that are valid.
// If, in the future, other combinations are found to contain relevant
// street name data, include them here.
R.chain(R.juxt([
R.props(["is_in:city", "name"]),
R.props(["addr:city", "addr:street"])
])),
R.reject(R.any(R.isNil)),
// Pass the transformed chunk to the next stage.
out => callback(null, out)
)(chunk)
}))
.on("data", R.pipe(
R.uniqBy(R.join("-")),
R.map(s => streets.write(",\n" + stringify(s)))
))
.on("finish", () => {
streets.write("]")
streets.close()
})
}
export const osmParse = (country) => R.pipe(
R.compose(read, osmRawFile),
// Skip the osm modified date
R.tail,
// Strip affixes and replace with equivalents; keep city unchanged
R.map(R.adjust(1, R.compose(equivalentStreet(country), stripAffixes(country)))),
// Even without the previous step, but moreso after it, there will be
// identical [city, name] entries. I've decided to allow only a single
// eponym per city, even though there might be eponyms for streets, squares
// or other landmarks representing the same person, all in the same city.
// This is a fair defense against misspelled streets or ones tagged multiple
// times with slightly different affixes or equivalents, all in the same
// city. Counting all these would be a mistake. Still, in some instances,
// the city names themselves are spelled differently (with or without
// cedilla, for example).
R.uniqBy(R.join("-")),
// The city name has served its purpose, discard it.
R.map(R.prop(1)),
// Count identical street names.
R.groupBy(R.identity),
R.mapObjIndexed(R.length),
// Transform to [streetName, count] array
R.toPairs,
// Remove streets composed of numbers, letters or other names less than 3
// characters to reduce the output file size; most than likely these do not
// designate persons. Note: it might not apply for China/Korea/Taiwan/etc.
R.reject(R.compose(
R.gte(3),
R.length,
R.prop(0))
),
// Protect against garbage entries and very long files (lots of streets)
R.reject(R.compose(
R.gt(minStreetFrequency(country)),
R.prop(1))
),
// Sort by most frequent street names first.
R.sortWith([R.descend(R.prop(1))]),
hydrateStreets(country),
// Add back the osm modified date
R.prepend(R.compose(R.head, read, osmRawFile)(country)),
personsCountryWrite(country),
)(country)
// Add previously available wiki links, when available, for country streets
const hydrateStreets = country => streets =>
fs.existsSync(personsCountryFile(country)) ?
R.pipe(
personsCountryRead,
prevStreets =>
R.map(entry =>
R.pipe(
R.find(R.compose(R.equals(R.head(entry)), R.head)),
// Add the existing link, if it exists.
prevStreet => R.append(prevStreet ? decodeURI(prevStreet[2]) : "", entry)
)(prevStreets),
streets)
)(country)
: streets
// Downloaded osm pbf file
export const osmPbfFile = fileLocation("data/osm/pbf", ".osm.pbf")
// Temporary file used to explore the contents of the pbf file.
export const osmInspectFile = fileLocation("data/osm/inspect", ".json")
// Extracted but not processed output file for country
export const osmRawFile = fileLocation("data/osm/raw", ".json")
// Get the file modified date from the OS; good enough to get a glimpse of the
// freshness of osm data; the alternative is to extract the modified date from
// the osm file (more complicated)
export const osmPbfFileDate = country => R.pipe(
osmPbfFile,
fs.statSync,
f => f.mtime.toDateString().substring(4),
)(country)
// Minimum frequency for a street name below which we ignore the said street.
export const minStreetFrequency = country => R.pipe(
// All country names and street frequencies, for all regions
() => R.pipe(
R.chain(R.tail),
R.reduce(R.concat, [])
)(regions),
R.find(R.compose(R.equals(country), R.prop(0))),
R.prop(1),
)()
// In what region is the `country` in
export const countryRegion = (country) => R.pipe(
R.find(R.find(R.find(c => c[0] === country))),
R.head
)(regions)
// All regions and countries as they appear in the openstreetmap (osm) database,
// http://download.geofabrik.de/
const regions = [
["africa", [
["algeria", 3],
["angola", 2],
["benin", 2],
["botswana", 2],
["burkina-faso", 2],
["burundi", 2],
["cameroon", 2],
["canary-islands", 2],
["central-african-republic", 2],
["chad", 2],
["congo-brazzaville", 2],
["congo-democratic-republic", 2],
["egypt", 3],
["ethiopia", 2],
["ghana", 2],
["guinea", 2],
["ivory-coast", 2],
["kenya", 2],
["liberia", 2],
["libya", 2],
["madagascar", 2],
["malawi", 2],
["mali", 2],
["morocco", 2],
["mozambique", 2],
["namibia", 2],
["nigeria", 2],
["senegal-and-gambia", 2],
["south-africa", 2],
["sudan", 2],
["tanzania", 3],
["togo", 2],
["tunisia", 2],
["uganda", 2],
["zambia", 2],
["zimbabwe", 2],
]],
["central-america", [
["belize", 2],
["costa-rica", 2],
["cuba", 2],
["el-salvador", 2],
["guatemala", 2],
["haiti-and-domrep", 2],
["honduras", 2],
["jamaica", 2],
["nicaragua", 2],
["panama", 2],
]],
["north-america", [
["canada", 3],
["greenland", 2],
["mexico", 3],
["us", 3],
]],
["south-america", [
["argentina", 3],
["bolivia", 2],
["brazil", 3],
["chile", 3],
["colombia", 2],
["ecuador", 2],
["guyana", 2],
["paraguay", 2],
["peru", 2],
["suriname", 2],
["uruguay", 3],
["venezuela", 2],
]],
["asia", [
["afghanistan", 2],
["armenia", 3],
["azerbaijan", 2],
["bangladesh", 3],
["bhutan", 2],
["cambodia", 2],
["china", 3],
["gcc-states", 3],
["india", 3],
["indonesia", 3],
["iran", 3],
["iraq", 3],
["israel-and-palestine", 3],
["japan", 3],
["jordan", 3],
["kazakhstan", 3],
["kyrgyzstan", 2],
["laos", 2],
["lebanon", 2],
["malaysia-singapore-brunei", 3],
["mongolia", 3],
["myanmar", 3],
["nepal", 3],
["north-korea", 3],
["pakistan", 3],
["philippines", 3],
["russia", 3],
["south-korea", 3],
["sri-lanka", 3],
["syria", 3],
["taiwan", 3],
["tajikistan", 2],
["thailand", 3],
["turkmenistan", 3],
["uzbekistan", 3],
["vietnam", 3],
["yemen", 2],
]],
["europe", [
["albania", 2],
["andorra", 2],
["austria", 3],
["azores", 2],
["belarus", 3],
["belgium", 2],
["bosnia-herzegovina", 2],
["bulgaria", 3],
["croatia", 3],
["cyprus", 2],
["czech-republic", 3],
["denmark", 3],
["estonia", 3],
["faroe-islands", 2],
["finland", 3],
["france", 3],
["georgia", 3],
["germany", 3],
["great-britain", 3],
["greece", 3],
["guernsey-jersey", 2],
["hungary", 2],
["iceland", 2],
["ireland-and-northern-ireland", 2],
["isle-of-man", 2],
["italy", 3],
["kosovo", 2],
["latvia", 3],
["liechtenstein", 2],
["lithuania", 3],
["luxembourg", 2],
["macedonia", 2],
["malta", 2],
["moldova", 2],
["monaco", 2],
["montenegro", 2],
["netherlands", 3],
["norway", 3],
["poland", 3],
["portugal", 3],
["romania", 2],
["russia", 2],
["serbia", 2],
["slovakia", 2],
["slovenia", 3],
["spain", 3],
["sweden", 3],
["switzerland", 3],
["turkey", 3],
["ukraine", 3],
]],
["australia-oceania", [
["american-oceania", 2],
["australia", 3],
["fiji", 2],
["new-caledonia", 2],
["new-zealand", 3],
["papua-new-guinea", 2],
["polynesie-francaise", 2],
]]
]