Skip to content

Commit

Permalink
Merge pull request #954 from openzim/ISNIT0/parsoid-slash-rewriting
Browse files Browse the repository at this point in the history
🐛 rewrite '/'s in urls for Parsoid articles, not just MCS
  • Loading branch information
kelson42 authored Aug 29, 2019
2 parents 544dff7 + 78d7bdd commit 79128ab
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/util/rewriteUrls.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,14 @@ export async function rewriteUrl(articleId: string, mw: MediaWiki, dump: Dump, l
}
} else { // This is MediaWiki HTML
await removeLinksToUnmirroredArticles(mw, dump, linkNode, href);
}

if (articleId.includes('/')) {
const href = linkNode.getAttribute('href').replace(/ /g, '_'); // href is modified above, so this is necessary
const resourceNamespace = 'A';
const slashesInUrl = articleId.split('/').length - 1;
const upStr = '../'.repeat(slashesInUrl + 1);
linkNode.setAttribute('href', `${upStr}${resourceNamespace}/${href}`);
}
if (articleId.includes('/')) {
const href = linkNode.getAttribute('href').replace(/ /g, '_'); // href is modified above, so this is necessary
const resourceNamespace = 'A';
const slashesInUrl = articleId.split('/').length - 1;
const upStr = '../'.repeat(slashesInUrl + 1);
linkNode.setAttribute('href', `${upStr}${resourceNamespace}/${href}`);
}
}
return { mediaDependencies };
Expand Down
62 changes: 62 additions & 0 deletions test/e2e/extra.e2e.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import test from 'blue-tape';
import { execute } from '../../src/mwoffliner.lib';
import { zimcheckAvailable, zimcheck } from '../util';
import rimraf from 'rimraf';
import { execPromise, writeFilePromise, mkdirPromise } from '../../src/util';
import { join } from 'path';
// import { ZimReader } from '@openzim/libzim';
// tslint:disable-next-line: no-var-requires
require('dotenv').config();

const now = new Date();
const testId = join(process.cwd(), `mwo-test-${+now}`);

const articleListUrl = join(testId, '/articleList');

test.only('Simple customMainPage', async (t) => {
await execPromise(`redis-cli flushall`);
await mkdirPromise(testId);

const articleListLines = `
Book:Cancer
Book:Ears_nose_throat
Book:Eye_diseases`;

await writeFilePromise(articleListUrl, articleListLines, 'utf8');

const outFiles = await execute({
mwUrl: `https://en.wikipedia.org`,
adminEmail: `[email protected]`,
articleList: articleListUrl,
customMainPage: 'Wikipedia:WikiProject_Medicine/Open_Textbook_of_Medicine2',
outputDirectory: testId,
redis: process.env.REDIS,
format: ['nopic'],
});

t.equal(outFiles.length, 1, `Created 1 outputs`);

for (const dump of outFiles) {
if (dump.nopic) {
t.equal(dump.status.articles.success, 4, 'nopic has 4 articles');
}

if (await zimcheckAvailable()) {
try {
await zimcheck(dump.outFile);
t.ok(true, `Zimcheck passes`);
} catch (err) {
t.ok(false, `Zimcheck passes`);
}
} else {
console.log(`Zimcheck not installed, skipping test`);
}
}

t.ok(true, 'Scraped customMainPage');
// TODO: clear test dir
rimraf.sync(testId);

const redisScan = await execPromise(`redis-cli --scan`);
t.equal(redisScan, '', 'Redis has been cleared');
});

0 comments on commit 79128ab

Please sign in to comment.