Skip to content

Commit

Permalink
Add five double pinyin formats and add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Chaoses-Ib committed Jan 27, 2022
1 parent ae93458 commit 6926a40
Show file tree
Hide file tree
Showing 7 changed files with 333 additions and 56 deletions.
240 changes: 240 additions & 0 deletions PinyinLib/DoublePinyin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
#include "pch.h"
#include "Pinyin.hpp"

#define LITERAL(s) IB_PINYIN_LITERAL(s)

namespace pinyin{
String Pinyin::to_double_pinyin_abc() const
{
#define ITEM(k, v) { LITERAL(#k), LITERAL(#v) }
static std::unordered_map<StringView, StringView> pinyin_map{
ITEM(e, oe), ITEM(o, oo),
ITEM(a, oa),
ITEM(ei, oq),
ITEM(ai, ol),
ITEM(ou, ob),
ITEM(ao, ok),
ITEM(en, of),
ITEM(an, oj),
ITEM(eng, og),
ITEM(ang, oh)
};
static std::unordered_map<StringView, StringView> initial_map{
ITEM(zh, a), ITEM(ch, e), ITEM(sh, v),
};
static std::unordered_map<StringView, StringView> final_map{
ITEM(i, i), ITEM(u, u), ITEM(v, v),
ITEM(e, e), ITEM(ie, x), ITEM(o, o), ITEM(uo, o), ITEM(ue, m), ITEM(ve, m),
ITEM(a, a), ITEM(ia, d), ITEM(ua, d),
ITEM(ei, q), ITEM(ui, m),
ITEM(ai, l), ITEM(uai, c),
ITEM(ou, b), ITEM(iu, r),
ITEM(ao, k), ITEM(iao, z),
ITEM(in, c), ITEM(un, n), ITEM(vn, n),
ITEM(en, f),
ITEM(an, j), ITEM(ian, w), ITEM(uan, p), ITEM(van, p),
ITEM(ing, y),
ITEM(ong, s), ITEM(iong, s),
ITEM(eng, g),
ITEM(ang, h), ITEM(iang, t), ITEM(uang, t),
ITEM(er, or)
};
#undef ITEM
return convert(pinyin_map, initial_map, final_map);
}

String Pinyin::to_double_pinyin_jiajia() const
{
#define ITEM(k, v) { LITERAL(#k), LITERAL(#v) }
static std::unordered_map<StringView, StringView> pinyin_map{
ITEM(e, ee), ITEM(o, oo),
ITEM(a, aa),
ITEM(ei, ew),
ITEM(ai, as),
ITEM(ou, op),
ITEM(ao, ad),
ITEM(en, er),
ITEM(an, af),
ITEM(eng, et),
ITEM(ang, ag)
};
static std::unordered_map<StringView, StringView> initial_map{
ITEM(zh, v), ITEM(ch, u), ITEM(sh, i),
};
static std::unordered_map<StringView, StringView> final_map{
ITEM(i, i), ITEM(u, u), ITEM(v, v),
ITEM(e, e), ITEM(ie, m), ITEM(o, o), ITEM(uo, o), ITEM(ue, x), ITEM(ve, t),
ITEM(a, a), ITEM(ia, b), ITEM(ua, b),
ITEM(ei, w), ITEM(ui, v),
ITEM(ai, s), ITEM(uai, x),
ITEM(ou, p), ITEM(iu, n),
ITEM(ao, d), ITEM(iao, k),
ITEM(in, l), ITEM(un, z), ITEM(vn, z),
ITEM(en, r),
ITEM(an, f), ITEM(ian, j), ITEM(uan, c), ITEM(van, c),
ITEM(ing, q),
ITEM(ong, y), ITEM(iong, y),
ITEM(eng, t),
ITEM(ang, g), ITEM(iang, h), ITEM(uang, h),
ITEM(er, eq)
};
#undef ITEM
return convert(pinyin_map, initial_map, final_map);
}

String Pinyin::to_double_pinyin_microsoft() const
{
#define ITEM(k, v) { LITERAL(#k), LITERAL(#v) }
static std::unordered_map<StringView, StringView> pinyin_map{
ITEM(e, oe), ITEM(o, oo),
ITEM(a, oa),
ITEM(ei, oz),
ITEM(ai, ol),
ITEM(ou, ob),
ITEM(ao, ok),
ITEM(en, of),
ITEM(an, oj),
ITEM(eng, og),
ITEM(ang, oh)
};
static std::unordered_map<StringView, StringView> initial_map{
ITEM(zh, v), ITEM(ch, i), ITEM(sh, u),
};
static std::unordered_map<StringView, StringView> final_map{
ITEM(i, i), ITEM(u, u), ITEM(v, y),
ITEM(e, e), ITEM(ie, x), ITEM(o, o), ITEM(uo, o), ITEM(ue, t), ITEM(ve, v),
ITEM(a, a), ITEM(ia, w), ITEM(ua, w),
ITEM(ei, z), ITEM(ui, v),
ITEM(ai, l), ITEM(uai, y),
ITEM(ou, b), ITEM(iu, q),
ITEM(ao, k), ITEM(iao, c),
ITEM(in, n), ITEM(un, p), ITEM(vn, p),
ITEM(en, f),
ITEM(an, j), ITEM(ian, m), ITEM(uan, r), ITEM(van, r),
ITEM(ing, ;),
ITEM(ong, s), ITEM(iong, s),
ITEM(eng, g),
ITEM(ang, h), ITEM(iang, d), ITEM(uang, d),
ITEM(er, or)
};
#undef ITEM
return convert(pinyin_map, initial_map, final_map);
}

String Pinyin::to_double_pinyin_thunisoft() const
{
#define ITEM(k, v) { LITERAL(#k), LITERAL(#v) }
static std::unordered_map<StringView, StringView> pinyin_map{
ITEM(e, oe), ITEM(o, oo),
ITEM(a, oa),
ITEM(ei, ok),
ITEM(ai, op),
ITEM(ou, oz),
ITEM(ao, oq),
ITEM(en, ow),
ITEM(an, or),
ITEM(eng, ot),
ITEM(ang, os)
};
static std::unordered_map<StringView, StringView> initial_map{
ITEM(zh, u), ITEM(ch, a), ITEM(sh, i),
};
static std::unordered_map<StringView, StringView> final_map{
ITEM(i, i), ITEM(u, u), ITEM(v, v),
ITEM(e, e), ITEM(ie, d), ITEM(o, o), ITEM(uo, o), ITEM(ue, n), ITEM(ve, n),
ITEM(a, a), ITEM(ia, x), ITEM(ua, x),
ITEM(ei, k), ITEM(ui, n),
ITEM(ai, p), ITEM(uai, y),
ITEM(ou, z), ITEM(iu, j),
ITEM(ao, q), ITEM(iao, b),
ITEM(in, y), ITEM(un, m), ITEM(vn, y),
ITEM(en, w),
ITEM(an, r), ITEM(ian, f), ITEM(uan, l), ITEM(van, l),
ITEM(ing, ;),
ITEM(ong, h), ITEM(iong, h),
ITEM(eng, t),
ITEM(ang, s), ITEM(iang, g), ITEM(uang, g),
ITEM(er, oj)
};
#undef ITEM
return convert(pinyin_map, initial_map, final_map);
}

String Pinyin::to_double_pinyin_xiaohe() const
{
#define ITEM(k, v) { LITERAL(#k), LITERAL(#v) }
static std::unordered_map<StringView, StringView> pinyin_map{
ITEM(e, ee), ITEM(o, oo),
ITEM(a, aa),
ITEM(ei, ei),
ITEM(ai, ai),
ITEM(ou, ou),
ITEM(ao, ao),
ITEM(en, en),
ITEM(an, an),
ITEM(eng, eg),
ITEM(ang, ah)
};
static std::unordered_map<StringView, StringView> initial_map{
ITEM(zh, v), ITEM(ch, i), ITEM(sh, u),
};
static std::unordered_map<StringView, StringView> final_map{
ITEM(i, i), ITEM(u, u), ITEM(v, v),
ITEM(e, e), ITEM(ie, p), ITEM(o, o), ITEM(uo, o), ITEM(ue, t), ITEM(ve, t),
ITEM(a, a), ITEM(ia, x), ITEM(ua, x),
ITEM(ei, w), ITEM(ui, v),
ITEM(ai, d), ITEM(uai, k),
ITEM(ou, z), ITEM(iu, q),
ITEM(ao, c), ITEM(iao, n),
ITEM(in, b), ITEM(un, y), ITEM(vn, y),
ITEM(en, f),
ITEM(an, j), ITEM(ian, m), ITEM(uan, r), ITEM(van, r),
ITEM(ing, k),
ITEM(ong, s), ITEM(iong, s),
ITEM(eng, g),
ITEM(ang, h), ITEM(iang, l), ITEM(uang, l),
ITEM(er, er)
};
#undef ITEM
return convert(pinyin_map, initial_map, final_map);
}

String Pinyin::to_double_pinyin_zrm() const
{
#define ITEM(k, v) { LITERAL(#k), LITERAL(#v) }
static std::unordered_map<StringView, StringView> pinyin_map{
ITEM(e, ee), ITEM(o, oo),
ITEM(a, aa),
ITEM(ei, ei),
ITEM(ai, ai),
ITEM(ou, ou),
ITEM(ao, ao),
ITEM(en, en),
ITEM(an, an),
ITEM(eng, eg),
ITEM(ang, ah)
};
static std::unordered_map<StringView, StringView> initial_map{
ITEM(zh, v), ITEM(ch, i), ITEM(sh, u),
};
static std::unordered_map<StringView, StringView> final_map{
ITEM(i, i), ITEM(u, u), ITEM(v, v),
ITEM(e, e), ITEM(ie, x), ITEM(o, o), ITEM(uo, o), ITEM(ue, t), ITEM(ve, t),
ITEM(a, a), ITEM(ia, w), ITEM(ua, w),
ITEM(ei, z), ITEM(ui, v),
ITEM(ai, l), ITEM(uai, y),
ITEM(ou, b), ITEM(iu, q),
ITEM(ao, k), ITEM(iao, c),
ITEM(in, n), ITEM(un, p), ITEM(vn, p),
ITEM(en, f),
ITEM(an, j), ITEM(ian, m), ITEM(uan, r), ITEM(van, r),
ITEM(ing, ;),
ITEM(ong, s), ITEM(iong, s),
ITEM(eng, g),
ITEM(ang, h), ITEM(iang, d), ITEM(uang, d),
ITEM(er, er)
};
#undef ITEM
return convert(pinyin_map, initial_map, final_map);
}
}
96 changes: 48 additions & 48 deletions PinyinLib/Pinyin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,33 @@ namespace pinyin {
return String(it->second);

String result;
for (size_t size = ascii.size(); size; size--) {
if (auto it = initial_map.find(ascii.substr(0, size)); it != initial_map.end()) {
ascii = ascii.substr(size);
if (ascii.size() >= 2) {
StringView first_two = ascii.substr(0, 2);
if (first_two == LITERAL("zh") || first_two == LITERAL("ch") || first_two == LITERAL("sh")) {
auto it = initial_map.find(first_two);
assert(it != initial_map.end());
result = it->second;
break;

ascii = ascii.substr(2);
}
else
goto single_initial;
}
else {
single_initial:
if (initial_map.size() == 3) {
Char c = ascii[0];
if (c != 'a' && c != 'e' && c != 'i' && c != 'o' && c != 'u' && c != 'v') {
result = c;
ascii = ascii.substr(1);
}
}
else {
auto it = initial_map.find(ascii.substr(0, 1));
assert(it != initial_map.end());
result = it->second;

ascii = ascii.substr(1);
}
}

Expand Down Expand Up @@ -182,50 +204,6 @@ namespace pinyin {
return ascii[0];
}

String Pinyin::to_double_pinyin_xiaohe() const
{
#define ITEM(k, v) { LITERAL(#k), LITERAL(#v) }
static std::unordered_map<StringView, StringView> pinyin_map{
ITEM(e, ee), ITEM(o, oo),
ITEM(a, aa),
ITEM(ei, ei),
ITEM(ai, ai),
ITEM(ou, ou),
ITEM(ao, ao),
ITEM(en, en),
ITEM(an, an),
ITEM(eng, eg),
ITEM(ang, ah)
};
static std::unordered_map<StringView, StringView> initial_map{
ITEM(b, b), ITEM(p, p), ITEM(m, m), ITEM(f, f),
ITEM(d, d), ITEM(t, t), ITEM(n, n), ITEM(z, z), ITEM(c, c), ITEM(s, s), ITEM(l, l),
ITEM(zh, v), ITEM(ch, i), ITEM(sh, u), ITEM(r, r),
ITEM(j, j), ITEM(q, q), ITEM(x, x),
ITEM(g, g), ITEM(k, k), ITEM(h, h),
ITEM(y, y), ITEM(w, w)
};
static std::unordered_map<StringView, StringView> final_map{
ITEM(i, i), ITEM(u, u), ITEM(v, v),
ITEM(e, e), ITEM(ie, p), ITEM(o, o), ITEM(uo, o), ITEM(ue, t), ITEM(ve, t),
ITEM(a, a), ITEM(ia, x), ITEM(ua, x),
ITEM(ei, w), ITEM(ui, v),
ITEM(ai, d), ITEM(uai, k),
ITEM(ou, z), ITEM(iu, q),
ITEM(ao, c), ITEM(iao, n),
ITEM(in, b), ITEM(un, y), ITEM(vn, y),
ITEM(en, f),
ITEM(an, j), ITEM(ian, m), ITEM(uan, r), ITEM(van, r),
ITEM(ing, k),
ITEM(ong, s), ITEM(iong, s),
ITEM(eng, g),
ITEM(ang, h), ITEM(iang, l), ITEM(uang, l),
ITEM(er, er)
};
#undef ITEM
return convert(pinyin_map, initial_map, final_map);
}

void init(PinyinFlagValue flags)
{
for (Pinyin& py : pinyins)
Expand Down Expand Up @@ -292,10 +270,32 @@ namespace pinyin {
if (size = starts_with(string, pinyin.pinyin_ascii))
return size;
}

if (flags & PinyinFlag::DoublePinyinAbc) {
if (size = starts_with(string, pinyin.double_pinyin_abc))
return size;
}
if (flags & PinyinFlag::DoublePinyinJiajia) {
if (size = starts_with(string, pinyin.double_pinyin_xiaohe))
return size;
}
if (flags & PinyinFlag::DoublePinyinMicrosoft) {
if (size = starts_with(string, pinyin.double_pinyin_xiaohe))
return size;
}
if (flags & PinyinFlag::DoublePinyinThunisoft) {
if (size = starts_with(string, pinyin.double_pinyin_xiaohe))
return size;
}
if (flags & PinyinFlag::DoublePinyinXiaohe) {
if (size = starts_with(string, pinyin.double_pinyin_xiaohe))
return size;
}
if (flags & PinyinFlag::DoublePinyinZrm) {
if (size = starts_with(string, pinyin.double_pinyin_xiaohe))
return size;
}

if (flags & PinyinFlag::InitialLetter) {
if (string.size() && string[0] == pinyin.initial_letter)
return 1;
Expand Down
Loading

0 comments on commit 6926a40

Please sign in to comment.