Skip to content

Commit

Permalink
autofix wikidata redirects
Browse files Browse the repository at this point in the history
  • Loading branch information
k-yle committed Feb 28, 2024
1 parent 7bb8393 commit 5844222
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 10 deletions.
4 changes: 4 additions & 0 deletions src/core/general.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import type { Tags } from 'pbf2json';
import { TOP_LEVEL_TAGS } from '../data';

const MAP = { n: 'node', w: 'way', r: 'relation' };
export const osmIdToLink = (osmId: string) =>
`https://osm.org/${MAP[<never>osmId[0]]}/${osmId.slice(1)}`;

export const findTopLevelTags = (tags: Tags) =>
TOP_LEVEL_TAGS.filter((tag) => {
if (tag.includes('~')) {
Expand Down
80 changes: 80 additions & 0 deletions src/stage3-conflate/checkWikidataRedirects.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { promises as fs } from 'node:fs';
import { join } from 'node:path';
import { USER_AGNET, createDiamond, osmIdToLink } from '../core';
import type { OsmPatchFile } from '../types';
import { wikidataErrors } from './compareFeatures/compareFeatures';

const outputFilePath = join(
__dirname,
'../../out/wikidata-redirects.osmPatch.geo.json',
);

type WikidataApiResponse = {
[qId: string]: {
id: string;
redirects?: { from: string; to: string };
};
};

export async function checkWikidataRedirects(): Promise<OsmPatchFile> {
console.log(
`Checking ${wikidataErrors.length} invalid wikidata tags for redirects…`,
);

const byOldQId = Object.fromEntries(
wikidataErrors.map((error) => [error.actual, error]),
);

const result: WikidataApiResponse = {};
const qIdsToFetch = Object.keys(byOldQId);

const chunkSize = 49;
for (let index = 0; index < qIdsToFetch.length; index += chunkSize) {
const chunk = qIdsToFetch.slice(index, index + chunkSize);
const chunkResult: { entities: WikidataApiResponse } = await fetch(
`https://wikidata.org/w/api.php?action=wbgetentities&format=json&ids=${chunk.join('|')}`,
{ headers: { 'User-Agent': USER_AGNET } },
).then((r) => r.json());

Object.assign(result, chunkResult.entities);
}

const redirects = Object.values(result)
.filter((entity) => entity.redirects)
.map((entity) => [entity.redirects!.from, entity.redirects!.to]);

const nonRedirectIssues = Object.values(result).filter(
(entity) => !entity.redirects,
);
for (const entity of nonRedirectIssues) {
const error = byOldQId[entity.id];
console.error(
`(!) Expected ${error.expected} on ${osmIdToLink(error.osmId)}`,
);
}

const patchFile: OsmPatchFile = {
type: 'FeatureCollection',
features: redirects.map(([from, to]) => {
const error = byOldQId[from];
return {
type: 'Feature',
id: error.osmId,
geometry: createDiamond(error.lat, error.lng),
properties: {
__action: 'edit',
wikidata: to,
},
};
}),
size: 'large',
stats: {} as never,
changesetTags: {
comment: 'update wikidata tags which point to redirect pages',
},
};

await fs.writeFile(outputFilePath, JSON.stringify(patchFile, null, 2));

return patchFile;
}
21 changes: 16 additions & 5 deletions src/stage3-conflate/compareFeatures/compareFeatures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ const DISTANCE_APART_THRESHOLD_NODE = 2500;
// this one is slightly higher since the centroid of the area might be quite far from the NZGB point
const DISTANCE_APART_THRESHOLD_AREA = 15_000;

export const wikidataErrors: string[] = [];
export type WikidataErrors = {
osmId: string;
expected: string;
actual: string;
lat: number;
lng: number;
};
export const wikidataErrors: WikidataErrors[] = [];

/** compares the OSM place with the NZGB place and returns a list of issues */
export function compareFeatures(
Expand Down Expand Up @@ -147,9 +154,13 @@ export function compareFeatures(
if (osm.tags.wikidata) {
// abort and don't touch the feature if there appears to be duplicate entries in wikidata
// Fixing this data issue may require editing or merging wikidata items.
wikidataErrors.push(
`(!) Wikidata tag is wrong on ${osm.osmId} (${osm.tags.wikidata}), should be ${nzgb.qId}`,
);
wikidataErrors.push({
osmId: osm.osmId,
expected: nzgb.qId,
actual: osm.tags.wikidata,
lat: nzgb.lat,
lng: nzgb.lng,
});
return undefined;
}

Expand All @@ -176,7 +187,7 @@ export function compareFeatures(
if (existingRef !== ref) {
// abort and don't touch the feature if someone has already tagged it with a different ref
// in the source:name tag
wikidataErrors.push(
console.error(
`(!) Incorrect source:name tag on ${osm.osmId}, should be “${ref}”`,
);
return undefined;
Expand Down
10 changes: 5 additions & 5 deletions src/stage3-conflate/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ import {
osmPathFilePath,
tempOsmFile,
} from '../core';
import {
compareFeatures,
wikidataErrors,
} from './compareFeatures/compareFeatures';
import { compareFeatures } from './compareFeatures/compareFeatures';
import { findMatch } from './findMatch';
import { getPresetTags } from './getPresetTags';
import { checkWikidataRedirects } from './checkWikidataRedirects';

// baseline: this took 120sec on the very first run (1k refs in the planet)
function processOneType(
Expand Down Expand Up @@ -290,9 +288,11 @@ async function main() {
}
}

console.log(wikidataErrors.join('\n'));
extraLayersObject['ZZ Wikidata Redirects'] = await checkWikidataRedirects();

await fs.writeFile(nzgbIndexPath, JSON.stringify(statsObject, null, 2));
await fs.writeFile(extraLayersFile, JSON.stringify(extraLayersObject));

console.log('Done');
}
main();

0 comments on commit 5844222

Please sign in to comment.