Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
Finalize Hepburn transliterator #7
Browse files Browse the repository at this point in the history
  • Loading branch information
mbilbille committed Sep 1, 2012
1 parent 28298f6 commit 0585db0
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 18 deletions.
54 changes: 50 additions & 4 deletions src/JpnForPhp/Transliterator/Hepburn.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ class Hepburn implements RomanizationInterface
'ぢゃ' => 'ja', 'ぢゅ' => 'ju', 'ぢょ' => 'jo',
'びゃ' => 'bya', 'びゅ' => 'byu', 'びょ' => 'byo',
'ぴゃ' => 'pya', 'ぴゅ' => 'pyu', 'ぴょ' => 'pyo',
' ' => ' ', '' => ', ',
'んあ' => "n'a", 'んい' => "n'i", 'んう' => "n'u", 'んえ' => "n'e", 'んお' => "n'o",
'んや' => "n'ya", 'んゆ' => "n'yu", 'んよ' => "n'yo",
);

/**
Expand Down Expand Up @@ -88,7 +89,6 @@ class Hepburn implements RomanizationInterface
'ヂャ' => 'ja', 'ヂュ' => 'ju', 'ヂョ' => 'jo',
'ビャ' => 'bya', 'ビュ' => 'byu', 'ビョ' => 'byo',
'ピャ' => 'pya', 'ピュ' => 'pyu', 'ピョ' => 'pyo',
' ' => ' ', '' => ', ',
'イィ' => 'yi', 'イェ' => 'ye',
'ウァ' => 'wa', 'ウィ' => 'wi', 'ウゥ' => 'wu', 'ウェ' => 'we', 'ウォ' => 'wo',
'ウュ' => 'wya',
Expand Down Expand Up @@ -122,8 +122,18 @@ class Hepburn implements RomanizationInterface
'リェ' => 'rye',
'ラ゜' => 'la', 'リ゜' => 'li', 'ル゜' => 'lu', 'レ゜' => 'le', 'ロ゜' => 'lo',
'' => 'va', '' => 'vi', '' => 've', '' => 'vo',
'ンア' => "n'a", 'ンイ' => "n'i", 'ンウ' => "n'u", 'ンエ' => "n'e", 'ンオ' => "n'o",
);

/**
* @var array Map Japanese punctuation marks to their equivalent in latin
* alphabet.
*/
protected $mapPunctuationMarks = array(
' ' => ' ', '' => ', ',
);


/**
* Implements fromHiragana();
*
Expand All @@ -132,7 +142,10 @@ class Hepburn implements RomanizationInterface
public function fromHiragana($str)
{
$output = strtr($str, $this->mapHiragana);
$output = strtr($output, $this->mapPunctuationMarks);
$output = $this->transliterateSokuon($output);
$output = $this->convertLongVowels($output);
$output = $this->convertParticles($output);

return $output;
}
Expand All @@ -145,6 +158,7 @@ public function fromHiragana($str)
public function fromKatakana($str)
{
$output = strtr($str, $this->mapKatakana);
$output = strtr($output, $this->mapPunctuationMarks);
$output = $this->transliterateSokuon($output, Transliterator::KATAKANA);
$output = $this->transliterateChoonpu($output);

Expand All @@ -160,7 +174,6 @@ public function __toString(){
return 'Hepburn romanization system (ヘボン式ローマ字)';
}


/**
* Transliterate Sokuon (http://en.wikipedia.org/wiki/Sokuon) character into
* its equivalent in latin alphabet.
Expand All @@ -181,7 +194,7 @@ protected function transliterateSokuon($str, $syllabary = Transliterator::HIRAGA

// As per Hepburn system ch > tch
// (http://en.wikipedia.org/wiki/Hepburn_romanization#Double_consonants)
return preg_replace('/cch/', 'tch', $output);
return str_replace('cch', 'tch', $output);
}

/**
Expand All @@ -204,4 +217,37 @@ protected function transliterateChoonpu($str)

return preg_replace('/(.)' . Transliterator::CHOONPU . '/ue', '$macrons[\'${1}\']', $str);
}

/**
* Post-processing transliteration to properly format long vowels.
* This is a minimalist implementation of Hepburn's rules. For a detailed
* explanation please refer to:
* - http://en.wikipedia.org/wiki/Hepburn_romanization#Long_vowels
*
* @param string $str String to be preprocessed.
*
* @return string Transliterated string.
*/
protected function convertLongVowels($str)
{
$search = array('aa', 'uu', 'ee', 'oo', 'ou');
$replace = array('ā', 'ū', 'ē', 'ō', 'ō');

return str_replace($search, $replace, $str);
}

/**
* Post-processing transliteration to properly format particles.
*
* @param string $str String to be preprocessed.
*
* @return string Transliterated string.
*/
protected function convertParticles($str)
{
$search = array(' ha ', ' he ', ' wo ');
$replace = array(' wa ', ' e ', ' o ');

return str_replace($search, $replace, $str);
}
}
52 changes: 38 additions & 14 deletions tests/transliterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,44 +10,60 @@
'functions' => array(
'toRomaji' => array(
array(
'input' => array('まっちゃ'),
'input' => array('くるま'),
'expected' => 'kuruma',
),
array(
'input' => array('がっこう', NULL, new MyClass()),
'expected' => 'がっこう',
),
array(
'input' => array('がっこう', NULL, new Hepburn()),
'expected' => 'gakkō',
),
array(
'input' => array('おばあさん', NULL, new Hepburn()),
'expected' => 'obāsan',
),
array(
'input' => array('まっちゃ', NULL, new Hepburn()),
'expected' => 'matcha',
),
array(
'input' => array('けっか'),
'input' => array('けっか', NULL, new Hepburn()),
'expected' => 'kekka',
),
array(
'input' => array('マッチャ'),
'input' => array('マッチャ', NULL, new Hepburn()),
'expected' => 'matcha',
),
array(
'input' => array('ケッカ'),
'input' => array('ケッカ', NULL, new Hepburn()),
'expected' => 'kekka',
),
array(
'input' => array('タクシー'),
'input' => array('タクシー', NULL, new Hepburn()),
'expected' => 'takushī',
),
array(
'input' => array('パーティー'),
'input' => array('パーティー', NULL, new Hepburn()),
'expected' => 'pātī',
),
array(
'input' => array('サッカーをやる'),
'input' => array('サッカーをやる', NULL, new Hepburn()),
'expected' => 'sakkāwoyaru',
),
array(
'input' => array('サッカー を やる'),
'expected' => 'sakkā wo yaru',
'input' => array('サッカー を やる', NULL, new Hepburn()),
'expected' => 'sakkā o yaru',
),
array(
'input' => array('がっこう', NULL, new MyClass()),
'expected' => 'がっこう',
'input' => array('あんない', NULL, new Hepburn()),
'expected' => 'annai',
),
array(
'input' => array('がっこう', NULL, new Hepburn()),
'expected' => 'gakkō',
'input' => array('きんえん', NULL, new Hepburn()),
'expected' => "kin'en",
),
array(
'input' => array('がっこう', NULL, new Kunrei()),
Expand All @@ -59,7 +75,7 @@
),
array(
'input' => array('サッカー を やる', Transliterator::HIRAGANA),
'expected' => 'サッカー wo yaru',
'expected' => 'サッカー o yaru',
),
array(
'input' => array('サッカー を やる', Transliterator::KATAKANA),
Expand All @@ -71,6 +87,10 @@
'input' => array('gakkou', Transliterator::HIRAGANA),
'expected' => 'がっこう',
),
array(
'input' => array('obāsan', Transliterator::HIRAGANA),
'expected' => 'おばあさん',
),
array(
'input' => array('chakku', Transliterator::KATAKANA),
'expected' => 'チャック',
Expand All @@ -95,6 +115,10 @@
'input' => array('fôdo', Transliterator::KATAKANA),
'expected' => 'フォード',
),
array(
'input' => array('gakkō ni ikimasu', Transliterator::HIRAGANA),
'expected' => 'がっこう に いきます',
),
),
),
);
Expand Down

0 comments on commit 0585db0

Please sign in to comment.