Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
Implements functionality to transliterate romaji into kana #11
Browse files Browse the repository at this point in the history
  • Loading branch information
mbilbille committed Sep 1, 2012
1 parent 8d667d0 commit 28298f6
Show file tree
Hide file tree
Showing 3 changed files with 257 additions and 5 deletions.
221 changes: 221 additions & 0 deletions src/JpnForPhp/Transliterator/Kana.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
<?php

/*
* This file is part of the JpnForPhp package.
*
* (c) Matthieu Bilbille
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace JpnForPhp\Transliterator;

use JpnForPhp\Analyzer\Analyzer;

/**
* Kana transliteration class
*/
class Kana
{
private $mapHiragana = array(
'a' => '', 'i' => '', 'u' => '', 'e' => '', 'o' => '',
'ka' => '', 'ki' => '', 'ku' => '', 'ke' => '', 'ko' => '',
'sa' => '', 'shi' => '', 'si' => '', 'su' => '', 'se' => '', 'so' => '',
'ta' => '', 'chi' => '', 'ti' => '', 'tsu' => '', 'tu' => '', 'te' => '', 'to' => '',
'na' => '', 'ni' => '', 'nu' => '', 'ne' => '', 'no' => '',
'ha' => '', 'hi' => '', 'fu' => '', 'hu' => '', 'he' => '', 'ho' => '',
'ma' => '', 'mi' => '', 'mu' => '', 'me' => '', 'mo' => '',
'ra' => '', 'ri' => '', 'ru' => '', 're' => '', 'ro' => '',
'ya' => '', 'yu' => '', 'yo' => '',
'wa' => '', 'wi' => '', 'we' => '', 'wo' => '',
'n' => '',
'ga' => '', 'gi' => '', 'gu' => '', 'ge' => '', 'go' => '',
'za' => '', 'ji' => '', 'zi' => '', 'zu' => '', 'ze' => '', 'zo' => '',
'da' => '', 'di' => '', 'dzu' => '', 'du' => '', 'de' => '', 'do' => '',
'ba' => '', 'bi' => '', 'bu' => '', 'be' => '', 'bo' => '',
'pa' => '', 'pi' => '', 'pu' => '', 'pe' => '', 'po' => '',
'vu' => '',
'kya' => 'きゃ', 'kyu' => 'きゅ', 'kyo' => 'きょ',
'sya' => 'しゃ', 'sha' => 'しゃ', 'syu' => 'しゅ', 'shu' => 'しゅ', 'syo' => 'しょ', 'sho' => 'しょ',
'cya' => 'ちゃ', 'cha' => 'ちゃ', 'cyu' => 'ちゅ', 'chu' => 'ちゅ', 'cyo' => 'ちょ', 'cho' => 'ちょ',
'nya' => 'にゃ', 'nyu' => 'みゅ', 'nyo' => 'にょ',
'hya' => 'ひゃ', 'hyu' => 'ひゅ', 'hyo' => 'ひょ',
'mya' => 'みゃ', 'myu' => 'みゅ', 'myo' => 'みょ',
'rya' => 'りゃ', 'ryu' => 'りゅ', 'ryo' => 'りょ',
'gya' => 'ぎゃ', 'gyu' => 'ぎゅ', 'gyo' => 'ぎょ',
'ja' => 'じゃ', 'jya' => 'じゃ', 'ju' => 'じゅ', 'jyu' => 'じゅ', 'jo' => 'じょ', 'jyo' => 'じょ',
'dja' => 'ぢゃ', 'dya' => 'ぢゃ', 'dju' => 'ぢゅ', 'dyu' => 'ぢゅ', 'djo' => 'ぢょ', 'dyo' => 'ぢょ',
'bya' => 'びゃ', 'byu' => 'びゅ', 'byo' => 'びょ',
'pya' => 'ぴゃ', 'pyu' => 'ぴゅ', 'pyo' => 'ぴょ',
);
private $mapKatakana = array(
'a' => '', 'i' => '', 'u' => '', 'e' => '', 'o' => '',
'ka' => '', 'ki' => '', 'ku' => '', 'ke' => '', 'ko' => '',
'sa' => '', 'shi' => '', 'si' => '', 'su' => '', 'se' => '', 'so' => '',
'ta' => '', 'chi' => '', 'ti' => '', 'tsu' => '', 'tu' => '', 'te' => '', 'to' => '',
'na' => '', 'ni' => '', 'nu' => '', 'ne' => '', 'no' => '',
'ha' => '', 'hi' => '', 'fu' => '', 'hu' => '', 'he' => '', 'ho' => '',
'ma' => '', 'mi' => '', 'mu' => '', 'me' => '', 'mo' => '',
'ra' => '', 'ri' => '', 'ru' => '', 're' => '', 'ro' => '',
'ya' => '', 'yu' => '', 'yo' => '',
'wa' => '', 'wi' => '', 'we' => '', 'wo' => '',
'n' => '',
'ga' => '', 'gi' => '', 'gu' => '', 'ge' => '', 'go' => '',
'za' => '', 'ji' => '', 'zi' => '', 'zu' => '', 'ze' => '', 'zo' => '',
'da' => '', 'di' => '', 'dzu' => '', 'du' => '', 'de' => '', 'do' => '',
'ba' => '', 'bi' => '', 'bu' => '', 'be' => '', 'bo' => '',
'pa' => '', 'pi' => '', 'pu' => '', 'pe' => '', 'po' => '',
'vu' => '',
'kya' => 'キャ', 'kyu' => 'キュ', 'kyo' => 'キョ',
'sya' => 'シャ', 'sha' => 'シャ', 'syu' => 'シュ', 'shu' => 'シュ', 'syo' => 'ショ', 'sho' => 'ショ',
'cya' => 'チャ', 'cha' => 'チャ', 'cyu' => 'チュ', 'chu' => 'チュ', 'cyo' => 'チョ', 'cho' => 'チョ',
'nya' => 'ニャ', 'nyu' => 'ニュ', 'nyo' => 'ニョ',
'hya' => 'ヒャ', 'hyu' => 'ヒュ', 'hyo' => 'ヒョ',
'mya' => 'ミャ', 'myu' => 'ミュ', 'myo' => 'ミョ',
'rya' => 'リャ', 'ryu' => 'リュ', 'ryo' => 'リョ',
'gya' => 'ギャ', 'gyu' => 'ギュ', 'gyo' => 'ギョ',
'ja' => 'ジャ', 'jya' => 'ジャ', 'ju' => 'ジュ', 'jyu' => 'ジュ', 'jo' => 'ジョ', 'jyo' => 'ジョ',
'dja' => 'ヂャ', 'dya' => 'ヂャ', 'dju' => 'ヂュ', 'dyu' => 'ヂュ', 'djo' => 'ヂョ', 'dyo' => 'ヂョ',
'bya' => 'ビャ', 'byu' => 'ビュ', 'byo' => 'ビョ',
'pya' => 'ピャ', 'pyu' => 'ピュ', 'pyo' => 'ピョ',
' ' => ' ', ',' => '', ', ' => '',
'yi' => 'イィ', 'ye' => 'イェ',
//'wa' => 'ウァ', 'wi' => 'ウィ', 'wu' => 'ウゥ', 'we' => 'ウェ', 'wo' => 'ウォ',
'wya' => 'ウュ',
'va' => 'ヴァ', 'vi' => 'ヴィ', 'vu' => '', 've' => 'ヴェ', 'vo' => 'ヴォ',
'vya' => 'ヴャ', 'vyu' => 'ヴュ', 'vye' => 'ヴィェ', 'vyo' => 'ヴョ',
'kye' => 'キェ',
'gye' => 'ギェ',
'kwa' => 'クァ', 'kwi' => 'クィ', 'kwe' => 'クェ', 'kwo' => 'クォ',
//'kwa' => 'クヮ',
'gwa' => 'グァ', 'gwi' => 'グィ', 'gwe' => 'グェ', 'gwo' => 'グォ',
//'gwa' => 'グヮ',
'she' => 'シェ',
'je' => 'ジェ',
//'si' => 'スィ',
//'zi' => 'ズィ',
'che' => 'チェ',
'tsa' => 'ツァ', 'tsi' => 'ツィ', 'tse' => 'ツェ', 'tso' => 'ツォ',
'tsyu' => 'ツュ',
//'ti' => 'ティ', 'tu' => 'テゥ',
'tyu' => 'テュ',
//'di' => 'ディ', 'du' => 'デゥ',
//'dyu' => 'デュ',
'nye' => 'ニェ',
'hye' => 'ヒェ',
'bye' => 'ビェ',
'pye' => 'ピェ',
'fa' => 'ファ', 'fi' => 'フィ', 'fe' => 'フェ', 'fo' => 'フォ',
'fya' => 'フャ', 'fyu' => 'フュ', 'fye' => 'フィェ', 'fyo' => 'フョ',
//'hu' => 'ホゥ',
'mye' => 'ミェ',
'rye' => 'リェ',
'la' => 'ラ゜', 'li' => 'リ゜', 'lu' => 'ル゜', 'le' => 'レ゜', 'lo' => 'ロ゜',
//'va' => 'ヷ', 'vi' => 'ヸ', 've' => 'ヹ', 'vo' => 'ヺ',
);
private $mapMarks = array(
' ' => ' ', ',' => '', ', ' => '', '-' => '',
);

/**
* Transliterate a string from latin alphabet into hiragana.
*
* @param string $str The string to be converted.
*
* @return string Converted string into hiragana.
*/
public function toHiragana($str){
$str = $this->prepareTransliteration($str, Transliterator::HIRAGANA);
$str = $this->transliterateSokuon($str, Transliterator::HIRAGANA);
$output = strtr($str, $this->mapHiragana);
$output = strtr($output, $this->mapMarks);
return $output;
}

/**
* Transliterate a string from latin alphabet into katakana.
*
* @param string $str The string to be converted.
*
* @return string Converted string into katakana.
*/
public function toKatakana($str){
$str = $this->prepareTransliteration($str,Transliterator::KATAKANA);
$str = $this->transliterateSokuon($str, Transliterator::KATAKANA);
$output = strtr($str, $this->mapKatakana);
$output = strtr($output, $this->mapMarks);
return $output;
}

/**
* Prepare a string for its transliteration in kana.
*
* @param string $str String to be prepared.
*
* @return string Prepared string.
*/
protected function prepareTransliteration($str, $syllabary)
{
$str = mb_strtolower($str, 'UTF-8');
$mapChars = array();
if($syllabary === Transliterator::HIRAGANA){
$mapChars = array(
'ā' => 'aa', 'ī' => 'ii', 'ū' => 'uu', 'ē' => 'ee', 'ō' => 'ou',
'ô' => 'ou',
);
}
elseif($syllabary === Transliterator::KATAKANA){
$mapChars = array(
'aa' => 'a-', 'ii' => 'i-', 'uu' => 'u-', 'ee' => 'e-', 'oo' => 'o-',
'ā' => 'a-', 'ī' => 'i-', 'ū' => 'u-', 'ē' => 'e-', 'ō' => 'o-',
'ô' => 'o-',
);
}
else{
return $str;
}

$prepared_s = strtr($str, $mapChars);

return $prepared_s;
}


/**
* Transliterate Sokuon (http://en.wikipedia.org/wiki/Sokuon) character into
* its equivalent in latin alphabet.
*
* @param string $str String to be transliterated.
* @param string $syllabary Syllabary to be used ; either Hiragana or
* Katakana.
*
* @return string Transliterated string.
*/
protected function transliterateSokuon($str, $syllabary)
{
$new_str = $str;
$length = Analyzer::length($str);

//No need to go further.
if ($length < 2) {
return $new_str;
}

$sokuon = ($syllabary === Transliterator::HIRAGANA) ? Transliterator::SOKUON_HIRAGANA : Transliterator::SOKUON_KATAKANA;
$skip = array('a', 'i', 'u', 'e', 'o', 'n');

for ($i = 1; $i < $length; $i++) {
$prev_char = substr($str, $i - 1, 1);
if (!in_array($prev_char, $skip)) {
// Don't forget Hepburn special case: ch > tch
if ($prev_char === substr($str, $i, 1) || ($prev_char === 't' && substr($str, $i, 2) === 'ch')) {
$new_str = substr_replace($str, $sokuon, $i - 1, 1);
}
}
}

return $new_str;
}

}
21 changes: 16 additions & 5 deletions src/JpnForPhp/Transliterator/Transliterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,20 @@ class Transliterator
* verification to properly convert a given string into romaji.
*
* @param string $str The input string.
* @param TransliteratorInterface $transliterator A transliterator instance.
* @param RomanizationInterface $transliterator A romanization instance.
* @param integer $syllabary Force source syllabary.
*
* @return string Converted string into romaji.
*/
public static function toRomaji($str, $syllabary = NULL, TransliteratorInterface $transliterator = NULL)
public static function toRomaji($str, $syllabary = NULL, RomanizationInterface $transliterator = NULL)
{
$output = $str;

if(is_null($transliterator)){
// Set default system to Hepburn
$transliterator = new Hepburn();
}
elseif(!$transliterator instanceof TransliteratorInterface) {
elseif(!$transliterator instanceof RomanizationInterface) {
return $output;
}

Expand All @@ -57,7 +57,7 @@ public static function toRomaji($str, $syllabary = NULL, TransliteratorInterface
$output = $transliterator->fromKatakana($str);
}
} else {
// Rather than guessing the appropriate syllabary, process both.
// Rather than guessing the appropriate syllabary, process both.
$output = $transliterator->fromHiragana($str);
$output = $transliterator->fromKatakana($output);
}
Expand All @@ -67,6 +67,17 @@ public static function toRomaji($str, $syllabary = NULL, TransliteratorInterface


public static function toKana($str, $syllabary){
return $str;

$output = $str;
$transliterator = new Kana();

if($syllabary === self::HIRAGANA){
$output = $transliterator->toHiragana($str);
}
elseif($syllabary === self::KATAKANA){
$output = $transliterator->toKatakana($str);
}

return $output;
}
}
20 changes: 20 additions & 0 deletions tests/transliterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,26 @@
'input' => array('chakku', Transliterator::KATAKANA),
'expected' => 'チャック',
),
array(
'input' => array('sakkaa', Transliterator::KATAKANA),
'expected' => 'サッカー',
),
array(
'input' => array('sakkā', Transliterator::KATAKANA),
'expected' => 'サッカー',
),
array(
'input' => array('foodo', Transliterator::KATAKANA),
'expected' => 'フォード',
),
array(
'input' => array('fōdo', Transliterator::KATAKANA),
'expected' => 'フォード',
),
array(
'input' => array('fôdo', Transliterator::KATAKANA),
'expected' => 'フォード',
),
),
),
);
Expand Down

0 comments on commit 28298f6

Please sign in to comment.