Skip to content

Commit

Permalink
fix wrong pairing of kanji and phonetic notation (reported in #10)
Browse files Browse the repository at this point in the history
  • Loading branch information
hexenq committed Jun 5, 2017
1 parent 972492d commit 9d8ed24
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 27 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
<a name="0.1.5"></a>
## [0.1.5](https://github.com/hexenq/kuroshiro.js/compare/0.1.4...0.1.5) (2017-06-05)

### Bug Fixes

* fix wrong pairing of kanji and phonetic notation (reported in [#10](https://github.com/hexenq/kuroshiro.js/issues/10))

<a name="0.1.4"></a>
## [0.1.4](https://github.com/hexenq/kuroshiro.js/compare/0.1.3...0.1.4) (2017-05-25)

Expand Down
25 changes: 17 additions & 8 deletions dist/browser/kuroshiro.js
Original file line number Diff line number Diff line change
Expand Up @@ -9682,23 +9682,32 @@ var convert = function(str, options){
break;
case 1:
var pattern = '';
var isLastTokenKanji = false;
var subs = []; // recognize kanjis and group them
for(var c=0;c<tokens[i].surface_form.length;c++){
if(isKanji(tokens[i].surface_form[c])){
pattern += '(.*)';
if(!isLastTokenKanji){ // ignore successive kanji tokens (#10)
isLastTokenKanji = true;
pattern += '(.*?)';
subs.push(tokens[i].surface_form[c]);
}else{
subs[subs.length-1] += tokens[i].surface_form[c];
}
}else{
isLastTokenKanji = false;
subs.push(tokens[i].surface_form[c]);
pattern += wanakana.isKatakana(tokens[i].surface_form[c]) ? wanakana.toHiragana(tokens[i].surface_form[c]):tokens[i].surface_form[c];
}
}
var reg = new RegExp(pattern);
var reg = new RegExp('^' + pattern + '$');
var matches = reg.exec(tokens[i].reading);
if(matches){
var pickKanji = 0;
for(var c1=0;c1<tokens[i].surface_form.length;c1++){
if(isKanji(tokens[i].surface_form[c1])){
notations.push([tokens[i].surface_form[c1],1,matches[pickKanji+1]]);
pickKanji++;
var pickKanji = 1;
for(var c1=0;c1<subs.length;c1++){
if(isKanji(subs[c1][0])){
notations.push([subs[c1],1,matches[pickKanji++]]);
}else{
notations.push([tokens[i].surface_form[c1],2,wanakana.toHiragana(tokens[i].surface_form[c1])]);
notations.push([subs[c1],2,wanakana.toHiragana(subs[c1])]);
}
}
}else{
Expand Down
2 changes: 1 addition & 1 deletion dist/browser/kuroshiro.min.js

Large diffs are not rendered by default.

27 changes: 18 additions & 9 deletions dist/node/kuroshiro.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "kuroshiro",
"version": "0.1.4",
"version": "0.1.5",
"description": "kuroshiro.js is a japanese language utility mainly for converting Kanji-mixed sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported.",
"main": "./dist/node/kuroshiro.js",
"scripts": {
Expand Down
25 changes: 17 additions & 8 deletions src/kuroshiro.js
Original file line number Diff line number Diff line change
Expand Up @@ -193,23 +193,32 @@ var convert = function(str, options){
break;
case 1:
var pattern = '';
var isLastTokenKanji = false;
var subs = []; // recognize kanjis and group them
for(var c=0;c<tokens[i].surface_form.length;c++){
if(isKanji(tokens[i].surface_form[c])){
pattern += '(.*)';
if(!isLastTokenKanji){ // ignore successive kanji tokens (#10)
isLastTokenKanji = true;
pattern += '(.*?)';
subs.push(tokens[i].surface_form[c]);
}else{
subs[subs.length-1] += tokens[i].surface_form[c];
}
}else{
isLastTokenKanji = false;
subs.push(tokens[i].surface_form[c]);
pattern += wanakana.isKatakana(tokens[i].surface_form[c]) ? wanakana.toHiragana(tokens[i].surface_form[c]):tokens[i].surface_form[c];
}
}
var reg = new RegExp(pattern);
var reg = new RegExp('^' + pattern + '$');
var matches = reg.exec(tokens[i].reading);
if(matches){
var pickKanji = 0;
for(var c1=0;c1<tokens[i].surface_form.length;c1++){
if(isKanji(tokens[i].surface_form[c1])){
notations.push([tokens[i].surface_form[c1],1,matches[pickKanji+1]]);
pickKanji++;
var pickKanji = 1;
for(var c1=0;c1<subs.length;c1++){
if(isKanji(subs[c1][0])){
notations.push([subs[c1],1,matches[pickKanji++]]);
}else{
notations.push([tokens[i].surface_form[c1],2,wanakana.toHiragana(tokens[i].surface_form[c1])]);
notations.push([subs[c1],2,wanakana.toHiragana(subs[c1])]);
}
}
}else{
Expand Down
6 changes: 6 additions & 0 deletions test/kuroshiroTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ describe("kuroshiro.js Test", function () {
const EXAMPLE_TEXT = "感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!";
const EXAMPLE_TEXT2 = "ブラウン管への愛が足りねぇな";
const EXAMPLE_TEXT3 = "関ヶ原の戦い";
const EXAMPLE_TEXT4 = "綺麗な花。面白い映画。面白かったです。";

before(function(done){
kuroshiro.init(done);
Expand Down Expand Up @@ -78,6 +79,11 @@ describe("kuroshiro.js Test", function () {
var result = kuroshiro.convert(ori,{mode:'okurigana', to:'hiragana'});
expect(result).to.eql('関ヶ原(せきがはら)の戦(たたか)い');
});
it("Kanji to Hiragana with okurigana(4)", function () {
var ori = EXAMPLE_TEXT4;
var result = kuroshiro.convert(ori,{mode:'okurigana', to:'hiragana'});
expect(result).to.eql('綺麗(きれい)な花(はな)。面白(おもしろ)い映画(えいが)。面白(おもしろ)かったです。');
});
it("Kanji to Katakana with okurigana", function () {
var ori = EXAMPLE_TEXT;
var result = kuroshiro.convert(ori,{mode:'okurigana', to:'katakana'});
Expand Down

0 comments on commit 9d8ed24

Please sign in to comment.