From 852023ce9a26db4b03ea820f4ce14d48bfea363f Mon Sep 17 00:00:00 2001 From: Qijia Liu Date: Sat, 12 Aug 2023 11:42:12 -0400 Subject: [PATCH] follow opencc conversion chain (#688) * follow opencc conversion chain * when a dict doesn't contain a word, pass as-is * de-duplication --- src/rime/gear/simplifier.cc | 49 +++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/src/rime/gear/simplifier.cc b/src/rime/gear/simplifier.cc index 2d38a5f63e..7463af3b28 100644 --- a/src/rime/gear/simplifier.cc +++ b/src/rime/gear/simplifier.cc @@ -58,19 +58,48 @@ class Opencc { } bool ConvertWord(const string& text, vector* forms) { - if (dict_ == nullptr) - return false; - opencc::Optional item = dict_->Match(text); - if (item.IsNull()) { - // Match not found + if (converter_ == nullptr) { return false; - } else { - const opencc::DictEntry* entry = item.Get(); - for (auto&& value : entry->Values()) { - forms->push_back(std::move(value)); + } + const list conversions = + converter_->GetConversionChain()->GetConversions(); + vector original_words{text}; + bool matched = false; + for (auto conversion : conversions) { + opencc::DictPtr dict = conversion->GetDict(); + if (dict == nullptr) { + return false; } - return forms->size() > 0; + set word_set; + vector converted_words; + for (const auto& original_word : original_words) { + opencc::Optional item = + dict->Match(original_word); + if (item.IsNull()) { + // Current dictionary doesn't convert the word. We need to keep it for + // other dicts in the chain. e.g. s2t.json expands 里 to 里 and 裏, + // then t2tw.json passes 里 as-is and converts 裏 to 裡. + if (word_set.insert(original_word).second) { + converted_words.push_back(original_word); + } + continue; + } + matched = true; + const opencc::DictEntry* entry = item.Get(); + for (const auto& converted_word : entry->Values()) { + if (word_set.insert(converted_word).second) { + converted_words.push_back(converted_word); + } + } + } + original_words.swap(converted_words); + } + if (!matched) { + // No dictionary contains the word + return false; } + *forms = std::move(original_words); + return forms->size() > 0; } bool RandomConvertText(const string& text, string* simplified) {