diff --git a/src/JpnForPhp/Transliterator/Kana.php b/src/JpnForPhp/Transliterator/Kana.php new file mode 100644 index 0000000..1cf005a --- /dev/null +++ b/src/JpnForPhp/Transliterator/Kana.php @@ -0,0 +1,221 @@ + 'あ', 'i' => 'い', 'u' => 'う', 'e' => 'え', 'o' => 'お', + 'ka' => 'か', 'ki' => 'き', 'ku' => 'く', 'ke' => 'け', 'ko' => 'こ', + 'sa' => 'さ', 'shi' => 'し', 'si' => 'し', 'su' => 'す', 'se' => 'せ', 'so' => 'そ', + 'ta' => 'た', 'chi' => 'ち', 'ti' => 'ち', 'tsu' => 'つ', 'tu' => 'つ', 'te' => 'て', 'to' => 'と', + 'na' => 'な', 'ni' => 'に', 'nu' => 'ぬ', 'ne' => 'ね', 'no' => 'の', + 'ha' => 'は', 'hi' => 'ひ', 'fu' => 'ふ', 'hu' => 'ふ', 'he' => 'へ', 'ho' => 'ほ', + 'ma' => 'ま', 'mi' => 'み', 'mu' => 'む', 'me' => 'め', 'mo' => 'も', + 'ra' => 'ら', 'ri' => 'り', 'ru' => 'る', 're' => 'れ', 'ro' => 'ろ', + 'ya' => 'や', 'yu' => 'ゆ', 'yo' => 'よ', + 'wa' => 'わ', 'wi' => 'ゐ', 'we' => 'ゑ', 'wo' => 'を', + 'n' => 'ん', + 'ga' => 'が', 'gi' => 'ぎ', 'gu' => 'ぐ', 'ge' => 'げ', 'go' => 'ご', + 'za' => 'ざ', 'ji' => 'じ', 'zi' => 'じ', 'zu' => 'ず', 'ze' => 'ぜ', 'zo' => 'ぞ', + 'da' => 'だ', 'di' => 'ぢ', 'dzu' => 'づ', 'du' => 'づ', 'de' => 'で', 'do' => 'ど', + 'ba' => 'ば', 'bi' => 'び', 'bu' => 'ぶ', 'be' => 'べ', 'bo' => 'ぼ', + 'pa' => 'ぱ', 'pi' => 'ぴ', 'pu' => 'ぷ', 'pe' => 'ぺ', 'po' => 'ぽ', + 'vu' => 'ゔ', + 'kya' => 'きゃ', 'kyu' => 'きゅ', 'kyo' => 'きょ', + 'sya' => 'しゃ', 'sha' => 'しゃ', 'syu' => 'しゅ', 'shu' => 'しゅ', 'syo' => 'しょ', 'sho' => 'しょ', + 'cya' => 'ちゃ', 'cha' => 'ちゃ', 'cyu' => 'ちゅ', 'chu' => 'ちゅ', 'cyo' => 'ちょ', 'cho' => 'ちょ', + 'nya' => 'にゃ', 'nyu' => 'みゅ', 'nyo' => 'にょ', + 'hya' => 'ひゃ', 'hyu' => 'ひゅ', 'hyo' => 'ひょ', + 'mya' => 'みゃ', 'myu' => 'みゅ', 'myo' => 'みょ', + 'rya' => 'りゃ', 'ryu' => 'りゅ', 'ryo' => 'りょ', + 'gya' => 'ぎゃ', 'gyu' => 'ぎゅ', 'gyo' => 'ぎょ', + 'ja' => 'じゃ', 'jya' => 'じゃ', 'ju' => 'じゅ', 'jyu' => 'じゅ', 'jo' => 'じょ', 'jyo' => 'じょ', + 'dja' => 'ぢゃ', 'dya' => 'ぢゃ', 'dju' => 'ぢゅ', 'dyu' => 'ぢゅ', 'djo' => 'ぢょ', 'dyo' => 'ぢょ', + 'bya' => 'びゃ', 'byu' => 'びゅ', 'byo' => 'びょ', + 'pya' => 'ぴゃ', 'pyu' => 'ぴゅ', 'pyo' => 'ぴょ', + ); + private $mapKatakana = array( + 'a' => 'ア', 'i' => 'イ', 'u' => 'ウ', 'e' => 'エ', 'o' => 'オ', + 'ka' => 'カ', 'ki' => 'キ', 'ku' => 'ク', 'ke' => 'ケ', 'ko' => 'コ', + 'sa' => 'サ', 'shi' => 'シ', 'si' => 'シ', 'su' => 'ス', 'se' => 'セ', 'so' => 'ソ', + 'ta' => 'タ', 'chi' => 'チ', 'ti' => 'チ', 'tsu' => 'ツ', 'tu' => 'ツ', 'te' => 'テ', 'to' => 'ト', + 'na' => 'ナ', 'ni' => 'ニ', 'nu' => 'ヌ', 'ne' => 'ネ', 'no' => 'ノ', + 'ha' => 'ハ', 'hi' => 'ヒ', 'fu' => 'フ', 'hu' => 'フ', 'he' => 'ヘ', 'ho' => 'ホ', + 'ma' => 'マ', 'mi' => 'ミ', 'mu' => 'ム', 'me' => 'メ', 'mo' => 'モ', + 'ra' => 'ラ', 'ri' => 'リ', 'ru' => 'ル', 're' => 'レ', 'ro' => 'ロ', + 'ya' => 'ヤ', 'yu' => 'ユ', 'yo' => 'ヨ', + 'wa' => 'ワ', 'wi' => 'ヰ', 'we' => 'ヱ', 'wo' => 'ヲ', + 'n' => 'ン', + 'ga' => 'ガ', 'gi' => 'ギ', 'gu' => 'グ', 'ge' => 'ゲ', 'go' => 'ゴ', + 'za' => 'ザ', 'ji' => 'ジ', 'zi' => 'ジ', 'zu' => 'ズ', 'ze' => 'ゼ', 'zo' => 'ゾ', + 'da' => 'ダ', 'di' => 'ヂ', 'dzu' => 'ヅ', 'du' => 'ヅ', 'de' => 'デ', 'do' => 'ド', + 'ba' => 'バ', 'bi' => 'ビ', 'bu' => 'ブ', 'be' => 'ベ', 'bo' => 'ボ', + 'pa' => 'パ', 'pi' => 'ピ', 'pu' => 'プ', 'pe' => 'ペ', 'po' => 'ポ', + 'vu' => 'ヴ', + 'kya' => 'キャ', 'kyu' => 'キュ', 'kyo' => 'キョ', + 'sya' => 'シャ', 'sha' => 'シャ', 'syu' => 'シュ', 'shu' => 'シュ', 'syo' => 'ショ', 'sho' => 'ショ', + 'cya' => 'チャ', 'cha' => 'チャ', 'cyu' => 'チュ', 'chu' => 'チュ', 'cyo' => 'チョ', 'cho' => 'チョ', + 'nya' => 'ニャ', 'nyu' => 'ニュ', 'nyo' => 'ニョ', + 'hya' => 'ヒャ', 'hyu' => 'ヒュ', 'hyo' => 'ヒョ', + 'mya' => 'ミャ', 'myu' => 'ミュ', 'myo' => 'ミョ', + 'rya' => 'リャ', 'ryu' => 'リュ', 'ryo' => 'リョ', + 'gya' => 'ギャ', 'gyu' => 'ギュ', 'gyo' => 'ギョ', + 'ja' => 'ジャ', 'jya' => 'ジャ', 'ju' => 'ジュ', 'jyu' => 'ジュ', 'jo' => 'ジョ', 'jyo' => 'ジョ', + 'dja' => 'ヂャ', 'dya' => 'ヂャ', 'dju' => 'ヂュ', 'dyu' => 'ヂュ', 'djo' => 'ヂョ', 'dyo' => 'ヂョ', + 'bya' => 'ビャ', 'byu' => 'ビュ', 'byo' => 'ビョ', + 'pya' => 'ピャ', 'pyu' => 'ピュ', 'pyo' => 'ピョ', + ' ' => ' ', ',' => '、', ', ' => '、', + 'yi' => 'イィ', 'ye' => 'イェ', + //'wa' => 'ウァ', 'wi' => 'ウィ', 'wu' => 'ウゥ', 'we' => 'ウェ', 'wo' => 'ウォ', + 'wya' => 'ウュ', + 'va' => 'ヴァ', 'vi' => 'ヴィ', 'vu' => 'ヴ', 've' => 'ヴェ', 'vo' => 'ヴォ', + 'vya' => 'ヴャ', 'vyu' => 'ヴュ', 'vye' => 'ヴィェ', 'vyo' => 'ヴョ', + 'kye' => 'キェ', + 'gye' => 'ギェ', + 'kwa' => 'クァ', 'kwi' => 'クィ', 'kwe' => 'クェ', 'kwo' => 'クォ', + //'kwa' => 'クヮ', + 'gwa' => 'グァ', 'gwi' => 'グィ', 'gwe' => 'グェ', 'gwo' => 'グォ', + //'gwa' => 'グヮ', + 'she' => 'シェ', + 'je' => 'ジェ', + //'si' => 'スィ', + //'zi' => 'ズィ', + 'che' => 'チェ', + 'tsa' => 'ツァ', 'tsi' => 'ツィ', 'tse' => 'ツェ', 'tso' => 'ツォ', + 'tsyu' => 'ツュ', + //'ti' => 'ティ', 'tu' => 'テゥ', + 'tyu' => 'テュ', + //'di' => 'ディ', 'du' => 'デゥ', + //'dyu' => 'デュ', + 'nye' => 'ニェ', + 'hye' => 'ヒェ', + 'bye' => 'ビェ', + 'pye' => 'ピェ', + 'fa' => 'ファ', 'fi' => 'フィ', 'fe' => 'フェ', 'fo' => 'フォ', + 'fya' => 'フャ', 'fyu' => 'フュ', 'fye' => 'フィェ', 'fyo' => 'フョ', + //'hu' => 'ホゥ', + 'mye' => 'ミェ', + 'rye' => 'リェ', + 'la' => 'ラ゜', 'li' => 'リ゜', 'lu' => 'ル゜', 'le' => 'レ゜', 'lo' => 'ロ゜', + //'va' => 'ヷ', 'vi' => 'ヸ', 've' => 'ヹ', 'vo' => 'ヺ', + ); + private $mapMarks = array( + ' ' => ' ', ',' => '、', ', ' => '、', '-' => 'ー', + ); + + /** + * Transliterate a string from latin alphabet into hiragana. + * + * @param string $str The string to be converted. + * + * @return string Converted string into hiragana. + */ + public function toHiragana($str){ + $str = $this->prepareTransliteration($str, Transliterator::HIRAGANA); + $str = $this->transliterateSokuon($str, Transliterator::HIRAGANA); + $output = strtr($str, $this->mapHiragana); + $output = strtr($output, $this->mapMarks); + return $output; + } + + /** + * Transliterate a string from latin alphabet into katakana. + * + * @param string $str The string to be converted. + * + * @return string Converted string into katakana. + */ + public function toKatakana($str){ + $str = $this->prepareTransliteration($str,Transliterator::KATAKANA); + $str = $this->transliterateSokuon($str, Transliterator::KATAKANA); + $output = strtr($str, $this->mapKatakana); + $output = strtr($output, $this->mapMarks); + return $output; + } + + /** + * Prepare a string for its transliteration in kana. + * + * @param string $str String to be prepared. + * + * @return string Prepared string. + */ + protected function prepareTransliteration($str, $syllabary) + { + $str = mb_strtolower($str, 'UTF-8'); + $mapChars = array(); + if($syllabary === Transliterator::HIRAGANA){ + $mapChars = array( + 'ā' => 'aa', 'ī' => 'ii', 'ū' => 'uu', 'ē' => 'ee', 'ō' => 'ou', + 'ô' => 'ou', + ); + } + elseif($syllabary === Transliterator::KATAKANA){ + $mapChars = array( + 'aa' => 'a-', 'ii' => 'i-', 'uu' => 'u-', 'ee' => 'e-', 'oo' => 'o-', + 'ā' => 'a-', 'ī' => 'i-', 'ū' => 'u-', 'ē' => 'e-', 'ō' => 'o-', + 'ô' => 'o-', + ); + } + else{ + return $str; + } + + $prepared_s = strtr($str, $mapChars); + + return $prepared_s; + } + + + /** + * Transliterate Sokuon (http://en.wikipedia.org/wiki/Sokuon) character into + * its equivalent in latin alphabet. + * + * @param string $str String to be transliterated. + * @param string $syllabary Syllabary to be used ; either Hiragana or + * Katakana. + * + * @return string Transliterated string. + */ + protected function transliterateSokuon($str, $syllabary) + { + $new_str = $str; + $length = Analyzer::length($str); + + //No need to go further. + if ($length < 2) { + return $new_str; + } + + $sokuon = ($syllabary === Transliterator::HIRAGANA) ? Transliterator::SOKUON_HIRAGANA : Transliterator::SOKUON_KATAKANA; + $skip = array('a', 'i', 'u', 'e', 'o', 'n'); + + for ($i = 1; $i < $length; $i++) { + $prev_char = substr($str, $i - 1, 1); + if (!in_array($prev_char, $skip)) { + // Don't forget Hepburn special case: ch > tch + if ($prev_char === substr($str, $i, 1) || ($prev_char === 't' && substr($str, $i, 2) === 'ch')) { + $new_str = substr_replace($str, $sokuon, $i - 1, 1); + } + } + } + + return $new_str; + } + +} diff --git a/src/JpnForPhp/Transliterator/Transliterator.php b/src/JpnForPhp/Transliterator/Transliterator.php index 984d78a..1441a68 100644 --- a/src/JpnForPhp/Transliterator/Transliterator.php +++ b/src/JpnForPhp/Transliterator/Transliterator.php @@ -32,12 +32,12 @@ class Transliterator * verification to properly convert a given string into romaji. * * @param string $str The input string. - * @param TransliteratorInterface $transliterator A transliterator instance. + * @param RomanizationInterface $transliterator A romanization instance. * @param integer $syllabary Force source syllabary. * * @return string Converted string into romaji. */ - public static function toRomaji($str, $syllabary = NULL, TransliteratorInterface $transliterator = NULL) + public static function toRomaji($str, $syllabary = NULL, RomanizationInterface $transliterator = NULL) { $output = $str; @@ -45,7 +45,7 @@ public static function toRomaji($str, $syllabary = NULL, TransliteratorInterface // Set default system to Hepburn $transliterator = new Hepburn(); } - elseif(!$transliterator instanceof TransliteratorInterface) { + elseif(!$transliterator instanceof RomanizationInterface) { return $output; } @@ -57,7 +57,7 @@ public static function toRomaji($str, $syllabary = NULL, TransliteratorInterface $output = $transliterator->fromKatakana($str); } } else { - // Rather than guessing the appropriate syllabary, process both. + // Rather than guessing the appropriate syllabary, process both. $output = $transliterator->fromHiragana($str); $output = $transliterator->fromKatakana($output); } @@ -67,6 +67,17 @@ public static function toRomaji($str, $syllabary = NULL, TransliteratorInterface public static function toKana($str, $syllabary){ - return $str; + + $output = $str; + $transliterator = new Kana(); + + if($syllabary === self::HIRAGANA){ + $output = $transliterator->toHiragana($str); + } + elseif($syllabary === self::KATAKANA){ + $output = $transliterator->toKatakana($str); + } + + return $output; } } diff --git a/tests/transliterator.php b/tests/transliterator.php index a51349c..1deb0ee 100644 --- a/tests/transliterator.php +++ b/tests/transliterator.php @@ -75,6 +75,26 @@ 'input' => array('chakku', Transliterator::KATAKANA), 'expected' => 'チャック', ), + array( + 'input' => array('sakkaa', Transliterator::KATAKANA), + 'expected' => 'サッカー', + ), + array( + 'input' => array('sakkā', Transliterator::KATAKANA), + 'expected' => 'サッカー', + ), + array( + 'input' => array('foodo', Transliterator::KATAKANA), + 'expected' => 'フォード', + ), + array( + 'input' => array('fōdo', Transliterator::KATAKANA), + 'expected' => 'フォード', + ), + array( + 'input' => array('fôdo', Transliterator::KATAKANA), + 'expected' => 'フォード', + ), ), ), );