Skip to content

Commit

Permalink
Detect default collation to be used during database creation
Browse files Browse the repository at this point in the history
  • Loading branch information
mneudert committed Sep 10, 2024
1 parent 1907ff7 commit 074ea55
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 16 deletions.
11 changes: 11 additions & 0 deletions core/Db/Schema.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@ private function getSchema(): SchemaInterface
return $this->schema;
}

/**
* Returns the default collation for a charset.
*
* @param string $charset
* @return string
*/
public function getDefaultCollationForCharset(string $charset): string
{
return $this->getSchema()->getDefaultCollationForCharset($charset);
}

/**
* Get the table options to use for a CREATE TABLE statement.
*
Expand Down
28 changes: 27 additions & 1 deletion core/Db/Schema/Mysql.php
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,31 @@ public function supportsComplexColumnUpdates(): bool
return true;
}

/**
* Returns the default collation for a charset.
*
* @param string $charset
*
* @return string
* @throws Exception
*/
public function getDefaultCollationForCharset(string $charset): string
{
$result = $this->getDb()->fetchRow(
'SHOW COLLATION WHERE `Default` = "Yes" AND `Charset` = ?',
[$charset]
);

if (!isset($result['Collation'])) {
throw new Exception(sprintf(
'Failed to detect default collation for character set "%s"',
$charset
));
}

return $result['Collation'];
}

public function getDefaultPort(): int
{
return 3306;
Expand Down Expand Up @@ -770,8 +795,9 @@ public function getSupportedReadIsolationTransactionLevel(): string
protected function getDatabaseCreateOptions(): string
{
$charset = DbHelper::getDefaultCharset();
$collation = $this->getDefaultCollationForCharset($charset);

return "DEFAULT CHARACTER SET $charset";
return "DEFAULT CHARACTER SET $charset COLLATE $collation";
}

protected function getTableEngine()
Expand Down
26 changes: 12 additions & 14 deletions core/Db/Schema/Tidb.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

namespace Piwik\Db\Schema;

use Piwik\DbHelper;

/**
* Mariadb schema
*/
Expand All @@ -27,6 +25,18 @@ public function supportsComplexColumnUpdates(): bool
return false;
}

public function getDefaultCollationForCharset(string $charset): string
{
$collation = parent::getDefaultCollationForCharset($charset);

if ('utf8mb4' === $charset && 'utf8mb4_bin' === $collation) {
// replace the TiDB default "utf8mb4_bin" with a better default
return 'utf8mb4_0900_ai_ci';
}

return $collation;
}

public function getDefaultPort(): int
{
return 4000;
Expand Down Expand Up @@ -78,16 +88,4 @@ public function getSupportedReadIsolationTransactionLevel(): string
// TiDB doesn't support READ UNCOMMITTED
return 'READ COMMITTED';
}

protected function getDatabaseCreateOptions(): string
{
$charset = DbHelper::getDefaultCharset();
$options = "DEFAULT CHARACTER SET $charset";

if ('utf8mb4' === $charset) {
$options .= ' COLLATE=utf8mb4_0900_ai_ci';
}

return $options;
}
}
9 changes: 9 additions & 0 deletions core/Db/SchemaInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ public function addMaxExecutionTimeHintToQuery(string $sql, float $limit): strin
*/
public function supportsComplexColumnUpdates(): bool;

/**
* Returns the default collation for a charset used by this database engine.
*
* @param string $charset
*
* @return string
*/
public function getDefaultCollationForCharset(string $charset): string;

/**
* Return the default port used by this database engine
*
Expand Down
15 changes: 14 additions & 1 deletion core/DbHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ public static function tableHasIndex($table, $indexName)
* @return string
* @throws Tracker\Db\DbException
*/
public static function getDefaultCharset()
public static function getDefaultCharset(): string
{
$result = Db::get()->fetchRow("SHOW CHARACTER SET LIKE 'utf8mb4'");

Expand All @@ -233,6 +233,19 @@ public static function getDefaultCharset()
return 'utf8mb4';
}

/**
* Returns the default collation for a charset.
*
* @param string $charset
*
* @return string
* @throws Exception
*/
public static function getDefaultCollationForCharset(string $charset): string
{
return Schema::getInstance()->getDefaultCollationForCharset($charset);
}

/**
* Returns sql queries to convert all installed tables to utf8mb4
*
Expand Down
10 changes: 10 additions & 0 deletions tests/PHPUnit/Integration/Db/Schema/TidbTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ public function testOptimize()
$this->assertFalse($schema->optimizeTables(['table3', 'table4'], true));
}

public function testGetDefaultCollationForCharsetReplacesUtf8mb4Binary(): void
{
$schema = new Db\Schema\Tidb();

self::assertSame(
'utf8mb4_0900_ai_ci',
$schema->getDefaultCollationForCharset('utf8mb4')
);
}

/**
* @dataProvider getTableCreateOptionsTestData
*/
Expand Down
19 changes: 19 additions & 0 deletions tests/PHPUnit/Integration/DbHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,25 @@ public function testAddOriginHintToQuery()
self::assertEquals($expected, $result);
}

public function testGetDefaultCollationForCharset(): void
{
$charset = 'utf8mb4';
$collation = DbHelper::getDefaultCollationForCharset($charset);
$expectedPrefix = $charset . '_';

// exact collation depends on the database used
// but should always start with the charset
self::assertStringStartsWith($expectedPrefix, $collation);
}

public function testGetDefaultCollationForCharsetThrowsForInvalidCharset(): void
{
self::expectException(\Exception::class);
self::expectExceptionMessage('Failed to detect default collation for character set "invalid"');

DbHelper::getDefaultCollationForCharset('invalid');
}

private function assertDbExists($dbName)
{
$dbs = Db::fetchAll("SHOW DATABASES");
Expand Down

0 comments on commit 074ea55

Please sign in to comment.