diff --git a/src/JpnForPhp/Transliterator/Hepburn.php b/src/JpnForPhp/Transliterator/Hepburn.php index cebd6e1..9673113 100644 --- a/src/JpnForPhp/Transliterator/Hepburn.php +++ b/src/JpnForPhp/Transliterator/Hepburn.php @@ -51,7 +51,8 @@ class Hepburn implements RomanizationInterface 'ぢゃ' => 'ja', 'ぢゅ' => 'ju', 'ぢょ' => 'jo', 'びゃ' => 'bya', 'びゅ' => 'byu', 'びょ' => 'byo', 'ぴゃ' => 'pya', 'ぴゅ' => 'pyu', 'ぴょ' => 'pyo', - ' ' => ' ', '、' => ', ', + 'んあ' => "n'a", 'んい' => "n'i", 'んう' => "n'u", 'んえ' => "n'e", 'んお' => "n'o", + 'んや' => "n'ya", 'んゆ' => "n'yu", 'んよ' => "n'yo", ); /** @@ -88,7 +89,6 @@ class Hepburn implements RomanizationInterface 'ヂャ' => 'ja', 'ヂュ' => 'ju', 'ヂョ' => 'jo', 'ビャ' => 'bya', 'ビュ' => 'byu', 'ビョ' => 'byo', 'ピャ' => 'pya', 'ピュ' => 'pyu', 'ピョ' => 'pyo', - ' ' => ' ', '、' => ', ', 'イィ' => 'yi', 'イェ' => 'ye', 'ウァ' => 'wa', 'ウィ' => 'wi', 'ウゥ' => 'wu', 'ウェ' => 'we', 'ウォ' => 'wo', 'ウュ' => 'wya', @@ -122,8 +122,18 @@ class Hepburn implements RomanizationInterface 'リェ' => 'rye', 'ラ゜' => 'la', 'リ゜' => 'li', 'ル゜' => 'lu', 'レ゜' => 'le', 'ロ゜' => 'lo', 'ヷ' => 'va', 'ヸ' => 'vi', 'ヹ' => 've', 'ヺ' => 'vo', + 'ンア' => "n'a", 'ンイ' => "n'i", 'ンウ' => "n'u", 'ンエ' => "n'e", 'ンオ' => "n'o", + ); + + /** + * @var array Map Japanese punctuation marks to their equivalent in latin + * alphabet. + */ + protected $mapPunctuationMarks = array( + ' ' => ' ', '、' => ', ', ); + /** * Implements fromHiragana(); * @@ -132,7 +142,10 @@ class Hepburn implements RomanizationInterface public function fromHiragana($str) { $output = strtr($str, $this->mapHiragana); + $output = strtr($output, $this->mapPunctuationMarks); $output = $this->transliterateSokuon($output); + $output = $this->convertLongVowels($output); + $output = $this->convertParticles($output); return $output; } @@ -145,6 +158,7 @@ public function fromHiragana($str) public function fromKatakana($str) { $output = strtr($str, $this->mapKatakana); + $output = strtr($output, $this->mapPunctuationMarks); $output = $this->transliterateSokuon($output, Transliterator::KATAKANA); $output = $this->transliterateChoonpu($output); @@ -160,7 +174,6 @@ public function __toString(){ return 'Hepburn romanization system (ヘボン式ローマ字)'; } - /** * Transliterate Sokuon (http://en.wikipedia.org/wiki/Sokuon) character into * its equivalent in latin alphabet. @@ -181,7 +194,7 @@ protected function transliterateSokuon($str, $syllabary = Transliterator::HIRAGA // As per Hepburn system ch > tch // (http://en.wikipedia.org/wiki/Hepburn_romanization#Double_consonants) - return preg_replace('/cch/', 'tch', $output); + return str_replace('cch', 'tch', $output); } /** @@ -204,4 +217,37 @@ protected function transliterateChoonpu($str) return preg_replace('/(.)' . Transliterator::CHOONPU . '/ue', '$macrons[\'${1}\']', $str); } + + /** + * Post-processing transliteration to properly format long vowels. + * This is a minimalist implementation of Hepburn's rules. For a detailed + * explanation please refer to: + * - http://en.wikipedia.org/wiki/Hepburn_romanization#Long_vowels + * + * @param string $str String to be preprocessed. + * + * @return string Transliterated string. + */ + protected function convertLongVowels($str) + { + $search = array('aa', 'uu', 'ee', 'oo', 'ou'); + $replace = array('ā', 'ū', 'ē', 'ō', 'ō'); + + return str_replace($search, $replace, $str); + } + + /** + * Post-processing transliteration to properly format particles. + * + * @param string $str String to be preprocessed. + * + * @return string Transliterated string. + */ + protected function convertParticles($str) + { + $search = array(' ha ', ' he ', ' wo '); + $replace = array(' wa ', ' e ', ' o '); + + return str_replace($search, $replace, $str); + } } diff --git a/tests/transliterator.php b/tests/transliterator.php index 1deb0ee..e911ec4 100644 --- a/tests/transliterator.php +++ b/tests/transliterator.php @@ -10,44 +10,60 @@ 'functions' => array( 'toRomaji' => array( array( - 'input' => array('まっちゃ'), + 'input' => array('くるま'), + 'expected' => 'kuruma', + ), + array( + 'input' => array('がっこう', NULL, new MyClass()), + 'expected' => 'がっこう', + ), + array( + 'input' => array('がっこう', NULL, new Hepburn()), + 'expected' => 'gakkō', + ), + array( + 'input' => array('おばあさん', NULL, new Hepburn()), + 'expected' => 'obāsan', + ), + array( + 'input' => array('まっちゃ', NULL, new Hepburn()), 'expected' => 'matcha', ), array( - 'input' => array('けっか'), + 'input' => array('けっか', NULL, new Hepburn()), 'expected' => 'kekka', ), array( - 'input' => array('マッチャ'), + 'input' => array('マッチャ', NULL, new Hepburn()), 'expected' => 'matcha', ), array( - 'input' => array('ケッカ'), + 'input' => array('ケッカ', NULL, new Hepburn()), 'expected' => 'kekka', ), array( - 'input' => array('タクシー'), + 'input' => array('タクシー', NULL, new Hepburn()), 'expected' => 'takushī', ), array( - 'input' => array('パーティー'), + 'input' => array('パーティー', NULL, new Hepburn()), 'expected' => 'pātī', ), array( - 'input' => array('サッカーをやる'), + 'input' => array('サッカーをやる', NULL, new Hepburn()), 'expected' => 'sakkāwoyaru', ), array( - 'input' => array('サッカー を やる'), - 'expected' => 'sakkā wo yaru', + 'input' => array('サッカー を やる', NULL, new Hepburn()), + 'expected' => 'sakkā o yaru', ), array( - 'input' => array('がっこう', NULL, new MyClass()), - 'expected' => 'がっこう', + 'input' => array('あんない', NULL, new Hepburn()), + 'expected' => 'annai', ), array( - 'input' => array('がっこう', NULL, new Hepburn()), - 'expected' => 'gakkō', + 'input' => array('きんえん', NULL, new Hepburn()), + 'expected' => "kin'en", ), array( 'input' => array('がっこう', NULL, new Kunrei()), @@ -59,7 +75,7 @@ ), array( 'input' => array('サッカー を やる', Transliterator::HIRAGANA), - 'expected' => 'サッカー wo yaru', + 'expected' => 'サッカー o yaru', ), array( 'input' => array('サッカー を やる', Transliterator::KATAKANA), @@ -71,6 +87,10 @@ 'input' => array('gakkou', Transliterator::HIRAGANA), 'expected' => 'がっこう', ), + array( + 'input' => array('obāsan', Transliterator::HIRAGANA), + 'expected' => 'おばあさん', + ), array( 'input' => array('chakku', Transliterator::KATAKANA), 'expected' => 'チャック', @@ -95,6 +115,10 @@ 'input' => array('fôdo', Transliterator::KATAKANA), 'expected' => 'フォード', ), + array( + 'input' => array('gakkō ni ikimasu', Transliterator::HIRAGANA), + 'expected' => 'がっこう に いきます', + ), ), ), );