Rename char32_t w tocodepoint

For the readability. Also updates some comments to match the [C++ Style Guide](https://google.github.io/styleguide/cppguide.html#Punctuation,_Spelling_and_Grammar). This patch has no behavior changes. PiperOrigin-RevId: 639659016
google · Jun 3, 2024 · 7e818ac · 7e818ac
1 parent 42cbb3f
commit 7e818ac
Show file tree

Hide file tree

Showing 9 changed files with 90 additions and 85 deletions.
diff --git a/src/base/util.cc b/src/base/util.cc
@@ -791,27 +791,29 @@ bool Util::IsEnglishTransliteration(absl::string_view value) {
 // script type
 // TODO(yukawa, team): Make a mechanism to keep this classifier up-to-date
 //   based on the original data from Unicode.org.
-Util::ScriptType Util::GetScriptType(char32_t w) {
-  if (INRANGE(w, 0x0030, 0x0039) ||  // ascii number
-      INRANGE(w, 0xFF10, 0xFF19)) {  // full width number
+Util::ScriptType Util::GetScriptType(char32_t codepoint) {
+  if (INRANGE(codepoint, 0x0030, 0x0039) ||  // ascii number
+      INRANGE(codepoint, 0xFF10, 0xFF19)) {  // full width number
     return NUMBER;
-  } else if (INRANGE(w, 0x0041, 0x005A) ||  // ascii upper
-             INRANGE(w, 0x0061, 0x007A) ||  // ascii lower
-             INRANGE(w, 0xFF21, 0xFF3A) ||  // fullwidth ascii upper
-             INRANGE(w, 0xFF41, 0xFF5A)) {  // fullwidth ascii lower
+  } else if (INRANGE(codepoint, 0x0041, 0x005A) ||  // ascii upper
+             INRANGE(codepoint, 0x0061, 0x007A) ||  // ascii lower
+             INRANGE(codepoint, 0xFF21, 0xFF3A) ||  // fullwidth ascii upper
+             INRANGE(codepoint, 0xFF41, 0xFF5A)) {  // fullwidth ascii lower
     return ALPHABET;
-  } else if (w == 0x3005 ||  // IDEOGRAPHIC ITERATION MARK "々"
-             INRANGE(w, 0x3400,
+  } else if (codepoint == 0x3005 ||  // IDEOGRAPHIC ITERATION MARK "々"
+             INRANGE(codepoint, 0x3400,
                      0x4DBF) ||  // CJK Unified Ideographs Extension A
-             INRANGE(w, 0x4E00, 0x9FFF) ||  // CJK Unified Ideographs
-             INRANGE(w, 0xF900, 0xFAFF) ||  // CJK Compatibility Ideographs
-             INRANGE(w, 0x20000,
+             INRANGE(codepoint, 0x4E00, 0x9FFF) ||  // CJK Unified Ideographs
+             INRANGE(codepoint, 0xF900,
+                     0xFAFF) ||  // CJK Compatibility Ideographs
+             INRANGE(codepoint, 0x20000,
                      0x2A6DF) ||  // CJK Unified Ideographs Extension B
-             INRANGE(w, 0x2A700,
+             INRANGE(codepoint, 0x2A700,
                      0x2B73F) ||  // CJK Unified Ideographs Extension C
-             INRANGE(w, 0x2B740,
+             INRANGE(codepoint, 0x2B740,
                      0x2B81F) ||  // CJK Unified Ideographs Extension D
-             INRANGE(w, 0x2F800, 0x2FA1F)) {  // CJK Compatibility Ideographs
+             INRANGE(codepoint, 0x2F800,
+                     0x2FA1F)) {  // CJK Compatibility Ideographs
     // As of Unicode 6.0.2, each block has the following characters assigned.
     // [U+3400, U+4DB5]:   CJK Unified Ideographs Extension A
     // [U+4E00, U+9FCB]:   CJK Unified Ideographs
@@ -821,51 +823,53 @@ Util::ScriptType Util::GetScriptType(char32_t w) {
     // [U+2B740, U+2B81D]: CJK Unified Ideographs Extension D
     // [U+2F800, U+2FA1D]: CJK Compatibility Ideographs
     return KANJI;
-  } else if (INRANGE(w, 0x3041, 0x309F) ||  // hiragana
-             w == 0x1B001) {                // HIRAGANA LETTER ARCHAIC YE
+  } else if (INRANGE(codepoint, 0x3041, 0x309F) ||  // hiragana
+             codepoint == 0x1B001) {  // HIRAGANA LETTER ARCHAIC YE
     return HIRAGANA;
-  } else if (INRANGE(w, 0x30A1, 0x30FF) ||  // full width katakana
-             INRANGE(w, 0x31F0,
+  } else if (INRANGE(codepoint, 0x30A1, 0x30FF) ||  // full width katakana
+             INRANGE(codepoint, 0x31F0,
                      0x31FF) ||  // Katakana Phonetic Extensions for Ainu
-             INRANGE(w, 0xFF65, 0xFF9F) ||  // half width katakana
-             w == 0x1B000) {                // KATAKANA LETTER ARCHAIC E
+             INRANGE(codepoint, 0xFF65, 0xFF9F) ||  // half width katakana
+             codepoint == 0x1B000) {                // KATAKANA LETTER ARCHAIC E
     return KATAKANA;
-  } else if (INRANGE(w, 0x02300, 0x023F3) ||  // Miscellaneous Technical
-             INRANGE(w, 0x02700, 0x027BF) ||  // Dingbats
-             INRANGE(w, 0x1F000, 0x1F02F) ||  // Mahjong tiles
-             INRANGE(w, 0x1F030, 0x1F09F) ||  // Domino tiles
-             INRANGE(w, 0x1F0A0, 0x1F0FF) ||  // Playing cards
-             INRANGE(w, 0x1F100,
+  } else if (INRANGE(codepoint, 0x02300, 0x023F3) ||  // Miscellaneous Technical
+             INRANGE(codepoint, 0x02700, 0x027BF) ||  // Dingbats
+             INRANGE(codepoint, 0x1F000, 0x1F02F) ||  // Mahjong tiles
+             INRANGE(codepoint, 0x1F030, 0x1F09F) ||  // Domino tiles
+             INRANGE(codepoint, 0x1F0A0, 0x1F0FF) ||  // Playing cards
+             INRANGE(codepoint, 0x1F100,
                      0x1F2FF) ||  // Enclosed Alphanumeric Supplement
-             INRANGE(w, 0x1F200, 0x1F2FF) ||  // Enclosed Ideographic Supplement
-             INRANGE(w, 0x1F300,
+             INRANGE(codepoint, 0x1F200,
+                     0x1F2FF) ||  // Enclosed Ideographic Supplement
+             INRANGE(codepoint, 0x1F300,
                      0x1F5FF) ||  // Miscellaneous Symbols And Pictographs
-             INRANGE(w, 0x1F600, 0x1F64F) ||  // Emoticons
-             INRANGE(w, 0x1F680, 0x1F6FF) ||  // Transport And Map Symbols
-             INRANGE(w, 0x1F700, 0x1F77F) ||  // Alchemical Symbols
-             w == 0x26CE) {                   // Ophiuchus
+             INRANGE(codepoint, 0x1F600, 0x1F64F) ||  // Emoticons
+             INRANGE(codepoint, 0x1F680,
+                     0x1F6FF) ||  // Transport And Map Symbols
+             INRANGE(codepoint, 0x1F700, 0x1F77F) ||  // Alchemical Symbols
+             codepoint == 0x26CE) {                   // Ophiuchus
     return EMOJI;
   }
 
   return UNKNOWN_SCRIPT;
 }
 
-Util::FormType Util::GetFormType(char32_t w) {
+Util::FormType Util::GetFormType(char32_t codepoint) {
   // 'Unicode Standard Annex #11: EAST ASIAN WIDTH'
   // http://www.unicode.org/reports/tr11/
 
   // Characters marked as 'Na' in
   // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
-  if (INRANGE(w, 0x0020, 0x007F) ||  // ascii
-      INRANGE(w, 0x27E6, 0x27ED) ||  // narrow mathematical symbols
-      INRANGE(w, 0x2985, 0x2986)) {  // narrow white parentheses
+  if (INRANGE(codepoint, 0x0020, 0x007F) ||  // ascii
+      INRANGE(codepoint, 0x27E6, 0x27ED) ||  // narrow mathematical symbols
+      INRANGE(codepoint, 0x2985, 0x2986)) {  // narrow white parentheses
     return HALF_WIDTH;
   }
 
   // Other characters marked as 'Na' in
   // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
-  if (INRANGE(w, 0x00A2, 0x00AF)) {
-    switch (w) {
+  if (INRANGE(codepoint, 0x00A2, 0x00AF)) {
+    switch (codepoint) {
       case 0x00A2:  // CENT SIGN
       case 0x00A3:  // POUND SIGN
       case 0x00A5:  // YEN SIGN
@@ -878,13 +882,13 @@ Util::FormType Util::GetFormType(char32_t w) {
 
   // Characters marked as 'H' in
   // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
-  if (w == 0x20A9 ||                 // WON SIGN
-      INRANGE(w, 0xFF61, 0xFF9F) ||  // half-width katakana
-      INRANGE(w, 0xFFA0, 0xFFBE) ||  // half-width hangul
-      INRANGE(w, 0xFFC2, 0xFFCF) ||  // half-width hangul
-      INRANGE(w, 0xFFD2, 0xFFD7) ||  // half-width hangul
-      INRANGE(w, 0xFFDA, 0xFFDC) ||  // half-width hangul
-      INRANGE(w, 0xFFE8, 0xFFEE)) {  // half-width symbols
+  if (codepoint == 0x20A9 ||                 // WON SIGN
+      INRANGE(codepoint, 0xFF61, 0xFF9F) ||  // half-width katakana
+      INRANGE(codepoint, 0xFFA0, 0xFFBE) ||  // half-width hangul
+      INRANGE(codepoint, 0xFFC2, 0xFFCF) ||  // half-width hangul
+      INRANGE(codepoint, 0xFFD2, 0xFFD7) ||  // half-width hangul
+      INRANGE(codepoint, 0xFFDA, 0xFFDC) ||  // half-width hangul
+      INRANGE(codepoint, 0xFFE8, 0xFFEE)) {  // half-width symbols
     return HALF_WIDTH;
   }
 
@@ -970,9 +974,10 @@ Util::ScriptType Util::GetScriptTypeWithoutSymbols(absl::string_view str) {
 // return true if all script_type in str is "type"
 bool Util::IsScriptType(absl::string_view str, Util::ScriptType type) {
   for (ConstChar32Iterator iter(str); !iter.Done(); iter.Next()) {
-    const char32_t w = iter.Get();
+    const char32_t codepoint = iter.Get();
     // Exception: 30FC (PROLONGEDSOUND MARK is categorized as HIRAGANA as well)
-    if (type != GetScriptType(w) && (w != 0x30FC || type != HIRAGANA)) {
+    if (type != GetScriptType(codepoint) &&
+        (codepoint != 0x30FC || type != HIRAGANA)) {
       return false;
     }
   }

diff --git a/src/base/util.h b/src/base/util.h
@@ -188,8 +188,8 @@ class Util {
     SCRIPT_TYPE_SIZE,
   };
 
-  // return script type of w
-  static ScriptType GetScriptType(char32_t w);
+  // Returns the script type of `codepoint`.
+  static ScriptType GetScriptType(char32_t codepoint);
 
   // Returns the script type of the first character in `str`.
   // This function finds the first UTF-8 chars and returns its script type.
@@ -198,7 +198,7 @@ class Util {
   static ScriptType GetFirstScriptType(absl::string_view str,
                                        size_t *mblen = nullptr);
 
-  // return script type of string. all chars in str must be
+  // Returns the script type of a string. All chars in str must be
   // KATAKANA/HIRAGANA/KANJI/NUMBER or ALPHABET.
   // If str has mixed scripts, this function returns UNKNOWN_SCRIPT
   static ScriptType GetScriptType(absl::string_view str);
@@ -207,10 +207,10 @@ class Util {
   // in the |str|.
   static ScriptType GetScriptTypeWithoutSymbols(absl::string_view str);
 
-  // return true if all script_type in str is "type"
+  // Returns true if all script_type in str is "type"
   static bool IsScriptType(absl::string_view str, ScriptType type);
 
-  // return true if the string contains script_type char
+  // Returns true if the string contains script_type char
   static bool ContainsScriptType(absl::string_view str, ScriptType type);
 
   // See 'Unicode Standard Annex #11: EAST ASIAN WIDTH'
@@ -223,12 +223,12 @@ class Util {
     FORM_TYPE_SIZE,
   };
 
-  // return Form type of single character.
+  // Returns Form type of single character.
   // This function never returns UNKNOWN_FORM.
-  static FormType GetFormType(char32_t w);
+  static FormType GetFormType(char32_t codepoint);
 
-  // return FormType of string.
-  // return UNKNOWN_FORM if |str| contains both HALF_WIDTH and FULL_WIDTH.
+  // Returns FormType of string.
+  // Returns UNKNOWN_FORM if |str| contains both HALF_WIDTH and FULL_WIDTH.
   static FormType GetFormType(absl::string_view str);
 
   // Returns true if all characters of `str` are ASCII (U+00 - U+7F).

diff --git a/src/converter/converter.cc b/src/converter/converter.cc
@@ -177,11 +177,12 @@ bool ExtractLastTokenWithScriptType(const absl::string_view text,
   std::vector<char32_t> reverse_last_token;
   Util::ScriptType last_script_type_found = Util::GetScriptType(iter.Get());
   for (; !iter.Done(); iter.Next()) {
-    const char32_t w = iter.Get();
-    if ((w == ' ') || (Util::GetScriptType(w) != last_script_type_found)) {
+    const char32_t codepoint = iter.Get();
+    if ((codepoint == ' ') ||
+        (Util::GetScriptType(codepoint) != last_script_type_found)) {
       break;
     }
-    reverse_last_token.push_back(w);
+    reverse_last_token.push_back(codepoint);
   }
 
   *last_script_type = last_script_type_found;

diff --git a/src/prediction/dictionary_prediction_aggregator_test.cc b/src/prediction/dictionary_prediction_aggregator_test.cc
@@ -291,13 +291,13 @@ void SetUpInputForSuggestionWithHistory(absl::string_view key,
 void GenerateKeyEvents(absl::string_view text,
                        std::vector<commands::KeyEvent> *keys) {
   keys->clear();
-  for (const char32_t w : Util::Utf8ToUtf32(text)) {
+  for (const char32_t codepoint : Util::Utf8ToUtf32(text)) {
     commands::KeyEvent key;
-    if (w <= 0x7F) {  // IsAscii, w is unsigned.
-      key.set_key_code(w);
+    if (codepoint <= 0x7F) {  // IsAscii, w is unsigned.
+      key.set_key_code(codepoint);
     } else {
       key.set_key_code('?');
-      *key.mutable_key_string() = Util::CodepointToUtf8(w);
+      *key.mutable_key_string() = Util::CodepointToUtf8(codepoint);
     }
     keys->push_back(key);
   }

diff --git a/src/prediction/user_history_predictor.cc b/src/prediction/user_history_predictor.cc
@@ -722,9 +722,9 @@ bool UserHistoryPredictor::MaybeRomanMisspelledKey(
   int num_hiragana = 0;
   int num_unknown = 0;
   for (ConstChar32Iterator iter(key); !iter.Done(); iter.Next()) {
-    const char32_t w = iter.Get();
-    const Util::ScriptType type = Util::GetScriptType(w);
-    if (type == Util::HIRAGANA || w == 0x30FC) {  // "ー".
+    const char32_t codepoint = iter.Get();
+    const Util::ScriptType type = Util::GetScriptType(codepoint);
+    if (type == Util::HIRAGANA || codepoint == 0x30FC) {  // "ー".
       ++num_hiragana;
       continue;
     }

diff --git a/src/prediction/user_history_predictor_test.cc b/src/prediction/user_history_predictor_test.cc
@@ -2821,9 +2821,9 @@ void InitSegmentsFromInputSequence(const absl::string_view text,
   DCHECK(segments);
   for (const UnicodeChar ch : Utf8AsUnicodeChar(text)) {
     commands::KeyEvent key;
-    const char32_t w = ch.char32();
-    if (w <= 0x7F) {  // IsAscii, w is unsigned.
-      key.set_key_code(w);
+    const char32_t codepoint = ch.char32();
+    if (codepoint <= 0x7F) {  // IsAscii, w is unsigned.
+      key.set_key_code(codepoint);
     } else {
       key.set_key_code('?');
       key.set_key_string(ch.utf8());

diff --git a/src/rewriter/collocation_rewriter.cc b/src/rewriter/collocation_rewriter.cc
@@ -273,9 +273,8 @@ bool IsNaturalContent(const Segment::Candidate &cand,
 
   // special cases
   if (top_content_len == 1) {
-    const char32_t wchar = Util::Utf8ToCodepoint(top_content);
-
-    switch (wchar) {
+    const char32_t codepoint = Util::Utf8ToCodepoint(top_content);
+    switch (codepoint) {
       case 0x304a:  // "お"
       case 0x5fa1:  // "御"
       case 0x3054:  // "ご"

diff --git a/src/rewriter/collocation_util.cc b/src/rewriter/collocation_util.cc
@@ -77,14 +77,14 @@ void CollocationUtil::RemoveExtraCharacters(const absl::string_view input,
                                             bool remove_number,
                                             std::string *output) {
   for (ConstChar32Iterator iter(input); !iter.Done(); iter.Next()) {
-    const char32_t w = iter.Get();
-    if (((Util::GetScriptType(w) != Util::UNKNOWN_SCRIPT) &&
-         (!remove_number || !IsNumber(w))) ||
-        w == 0x3005 ||                 // "々"
-        w == 0x0025 || w == 0xFF05 ||  // "%", "％"
-        w == 0x3006 ||                 // "〆"
-        w == 0x301C || w == 0xFF5E) {  // "〜", "～"
-      Util::CodepointToUtf8Append(w, output);
+    const char32_t codepoint = iter.Get();
+    if (((Util::GetScriptType(codepoint) != Util::UNKNOWN_SCRIPT) &&
+         (!remove_number || !IsNumber(codepoint))) ||
+        codepoint == 0x3005 ||                         // "々"
+        codepoint == 0x0025 || codepoint == 0xFF05 ||  // "%", "％"
+        codepoint == 0x3006 ||                         // "〆"
+        codepoint == 0x301C || codepoint == 0xFF5E) {  // "〜", "～"
+      Util::CodepointToUtf8Append(codepoint, output);
     }
   }
 }

diff --git a/src/rewriter/usage_rewriter.cc b/src/rewriter/usage_rewriter.cc
@@ -110,19 +110,19 @@ std::string UsageRewriter::GetKanjiPrefixAndOneHiragana(
   bool has_kanji = false;
   bool has_hiragana = false;
   for (ConstChar32Iterator iter(word); !iter.Done(); iter.Next()) {
-    const char32_t w = iter.Get();
-    const Util::ScriptType s = Util::GetScriptType(w);
+    const char32_t codepoint = iter.Get();
+    const Util::ScriptType s = Util::GetScriptType(codepoint);
     if (pos == 0 && s != Util::KANJI) {
       return "";
     } else if (pos >= 0 && pos <= 1 && s == Util::KANJI) {
       // length of kanji <= 2.
       has_kanji = true;
       ++pos;
-      Util::CodepointToUtf8Append(w, &result);
+      Util::CodepointToUtf8Append(codepoint, &result);
       continue;
     } else if (pos > 0 && s == Util::HIRAGANA) {
       has_hiragana = true;
-      Util::CodepointToUtf8Append(w, &result);
+      Util::CodepointToUtf8Append(codepoint, &result);
       break;
     } else {
       return "";