Skip to content
This repository has been archived by the owner on Dec 7, 2024. It is now read-only.

Commit

Permalink
Update slash encoding to use # rather than |
Browse files Browse the repository at this point in the history
When a subpage is exported, or the namespace of an exported page contains a
`/`, PagePort previously put the content of the page in a file where the `/`
was replaced with a `|`, so that extra slashes in page names or namespaces
would not result in increasing levels of sub directories in the package output.

The `|` was presumably chosen to ensure that the encoded name would not
conflict with any real page names, since `|` is not support for page titles in
MediaWiki. However, `|` is also not supported in file names on Windows
machines. Instead, encode the `/` with a `#`, which is likewise not allowed in
MediaWiki page titles, but *is* allowed in Windows file names.

While the behavior for new exports is to use `#`, any existing export that uses
`|` can still be imported, and a test case is added to verify that backwards
compatibility is not broken.

The default version of packages exported is now 0.2 rather than 0.1 to allow
packages without explicit versions to be identified as coming after this fix.

Additionally, split `PagePortTest::testImport()` to avoid deleting the pages
after exporting; it seems that PHPUnit only runs `setUp()` *once* for each test
method, even if a test method uses a data provider. Thus, the deletion at the
end of the first execution of `testImport()` was breaking the export of the
same pages in the second execution that was added. Instead, add a separate
`PagePortTest::testDelete()` test.

SEL-1609
  • Loading branch information
DanielWTQ committed Oct 14, 2024
1 parent 65cb690 commit c0a3866
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 15 deletions.
28 changes: 18 additions & 10 deletions includes/PagePort.php
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,17 @@ private function getPagesFromDir( $dir ): array {
$namespace = basename( $dir );
$name = $l;

if ( strpos( $namespace, '|' ) !== false ) {
$namespace = str_replace( '|', '/', $namespace );
}
$namespace = str_replace( '#', '/', $namespace );
# Legacy handling for exports that used | rather than #, which
# was changed for windows support in SEL-1609
$namespace = str_replace( '|', '/', $namespace );

$name = str_replace( '.mediawiki', '', $name );
if ( strpos( $name, '|' ) !== false ) {
$name = str_replace( '|', '/', $name );
}
$name = str_replace( '#', '/', $name );
# Legacy handling for exports that used | rather than #, which
# was changed for windows support in SEL-1609
$name = str_replace( '|', '/', $name );

$fulltitle = $namespace . ':' . $name;
// Clean up the Main namespace from the title
$fulltitle = str_replace( 'Main:', '', $fulltitle );
Expand Down Expand Up @@ -228,15 +232,19 @@ public function export( array $pages, string $root, bool $save = true ) {
continue;
}
if ( strpos( $namespaceName, '/' ) !== false ) {
$namespaceName = str_replace( '/', '|', $namespaceName );
// Used to be replaced with a |, now # for windows support,
// SEL-1609
$namespaceName = str_replace( '/', '#', $namespaceName );
}
$contentObj = $this->wikiPageFactory->newFromTitle( $title )->getContent();
$content = $contentObj->getWikitextForTransclusion();
if ( $save && !file_exists( $root . '/' . $namespaceName ) ) {
mkdir( $root . '/' . $namespaceName );
}
if ( strpos( $filename, '/' ) !== false ) {
$filename = str_replace( '/', '|', $filename );
// Used to be replaced with a |, now # for windows support,
// SEL-1609
$filename = str_replace( '/', '#', $filename );
}
$targetFileName = $root . '/' . $namespaceName . '/' . $filename;
if ( $contentObj->getModel() === CONTENT_MODEL_WIKITEXT ) {
Expand Down Expand Up @@ -342,7 +350,7 @@ public function exportJSON(
$packageName => [
"globalID" => str_replace( ' ', '.', $packageName ),
"description" => $packageDesc,
"version" => $version ?: '0.1',
"version" => $version ?: '0.2',
"pages" => [],
"requiredExtensions" => []
]
Expand All @@ -352,7 +360,7 @@ public function exportJSON(
foreach ( $pages as $page ) {
$title = Title::newFromText( $page );
$name = $title->getText();
$escapedName = str_replace( '/', '|', $name );
$escapedName = str_replace( '/', '#', $name );
$namespace = $this->getNamespaceByValue( $title->getNamespace() );
// PagePort can't handle deprecated NS_IMAGE
if ( $namespace === "NS_IMAGE" ) {
Expand Down
64 changes: 59 additions & 5 deletions tests/phpunit/PagePortTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,9 @@ public function testExportPages(): void {
$result[array_keys( $result )[2]],
'page contents with wiki templates are exported correctly'
);
// Subpages use # instead of | for windows support, SEL-1609
$this->assertEquals(
'testRoot/Main/Page4Test|SubPage1Test.mediawiki',
'testRoot/Main/Page4Test#SubPage1Test.mediawiki',
array_keys( $result )[3],
'file root for subpages is calculated correctly'
);
Expand All @@ -147,8 +148,9 @@ public function testExportPages(): void {
array_keys( $result )[7],
'file root for pages with extra namespace is calculated correctly'
);
// Subpages use # instead of | for windows support, SEL-1609
$this->assertEquals(
'testRoot/CustomNamespace|With|Slashes/Page8Test.mediawiki',
'testRoot/CustomNamespace#With#Slashes/Page8Test.mediawiki',
array_keys( $result )[8],
'file root for pages with extra namespace with slashes is calculated correctly'
);
Expand Down Expand Up @@ -309,9 +311,9 @@ public function testGetNamespaceByValue( $input, $expected ) {
/**
* @covers PagePort::import
* @covers PagePort::getAllPages
* @covers PagePort::delete
* @dataProvider provideTestImport
*/
public function testImport() {
public function testImport( callable $callback = null ) {
$tempDir = $this->tempdir( 'pageprot_' );
$pages = [
'Page1Test',
Expand All @@ -320,11 +322,15 @@ public function testImport() {
'Page4Test/SubPage1Test',
'Template:Page5Test',
'File:Page6Test',
'CustomNamespace/With/Slashes:Page8Test',
'Page with spaces',
'MediaWiki:Common.css',
'MediaWiki:Example.js',
];
$this->pp->export( $pages, $tempDir );
if ( $callback !== null ) {
$callback( $pages, $tempDir );
}
foreach ( $pages as $page ) {
$title = Title::newFromText( $page );
$wp = $this->wikiPageFactory->newFromTitle( $title );
Expand All @@ -347,10 +353,58 @@ public function testImport() {
foreach ( $pages as $p ) {
$this->assertContains( $p, $allPages );
}
}

public static function provideTestImport() {
yield 'New export' => [ null ];
// Checking that packages exported before the change of `/` being
// encoded with `#` rather than `|` (for windows support) can still
// be imported. Since the export logic no longer creates files with |,
// manually rename the exported files
$cb = static function ( $pages, $tempDir ) {
// Only pages with / (encoded as #) are
// - Page4Test/SubPage1Test (main namespace)
// - CustomNamespace/With/Slashes:Page8Test (custom namespace 4002)
rename(
"$tempDir/Main/Page4Test#SubPage1Test.mediawiki",
"$tempDir/Main/Page4Test|SubPage1Test.mediawiki"
);
rename(
"$tempDir/CustomNamespace#With#Slashes",
"$tempDir/CustomNamespace|With|Slashes"
);
};
yield 'Old export' => [ $cb ];
}

/**
* @covers PagePort::delete
*/
public function testDelete() {
$tempDir = $this->tempdir( 'pageprot_' );
$pages = [
'Page1Test',
'Page2Test',
'Page3Test',
'Page4Test/SubPage1Test',
'Template:Page5Test',
'File:Page6Test',
'Page with spaces',
'MediaWiki:Common.css',
'MediaWiki:Example.js',
];
// Export to have a copy of the list of pages in a directory, because
// delete() is given a directory of the pages that should be deleted
// from the wiki
$this->pp->export( $pages, $tempDir );
$allPages = $this->pp->getAllPages();
foreach ( $pages as $p ) {
$this->assertContains( $p, $allPages, "Page $p is included in getAllPages()" );
}
$this->pp->delete( $tempDir );
$allPages = $this->pp->getAllPages();
foreach ( $pages as $p ) {
$this->assertNotContains( $p, $allPages );
$this->assertNotContains( $p, $allPages, "Page $p was deleted" );
}
}

Expand Down

0 comments on commit c0a3866

Please sign in to comment.