Skip to content

Commit

Permalink
Merge pull request godotengine#90162 from YeldhamDev/pot_future_proof
Browse files Browse the repository at this point in the history
Improve string extraction of ETR POT file
  • Loading branch information
akien-mga committed May 8, 2024
2 parents 9c7e403 + 7bdae75 commit 4509404
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 23 deletions.
170 changes: 153 additions & 17 deletions editor/editor_translation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,30 +156,166 @@ void load_extractable_translations(const String &p_locale) {
}
}

List<StringName> get_extractable_message_list() {
Vector<Vector<String>> get_extractable_message_list() {
ExtractableTranslationList *etl = _extractable_translations;
List<StringName> msgids;
while (etl->data) {
if (!strcmp(etl->lang, "source")) {
Vector<uint8_t> data;
data.resize(etl->uncomp_size);
int ret = Compression::decompress(data.ptrw(), etl->uncomp_size, etl->data, etl->comp_size, Compression::MODE_DEFLATE);
ERR_FAIL_COND_V_MSG(ret == -1, msgids, "Compressed file is corrupt.");
Vector<Vector<String>> list;

Ref<FileAccessMemory> fa;
fa.instantiate();
fa->open_custom(data.ptr(), data.size());

Ref<Translation> tr = TranslationLoaderPO::load_translation(fa);
while (etl->data) {
if (strcmp(etl->lang, "source")) {
etl++;
continue;
}

if (tr.is_valid()) {
tr->get_message_list(&msgids);
break;
Vector<uint8_t> data;
data.resize(etl->uncomp_size);
int ret = Compression::decompress(data.ptrw(), etl->uncomp_size, etl->data, etl->comp_size, Compression::MODE_DEFLATE);
ERR_FAIL_COND_V_MSG(ret == -1, list, "Compressed file is corrupt.");

Ref<FileAccessMemory> fa;
fa.instantiate();
fa->open_custom(data.ptr(), data.size());

// Taken from TranslationLoaderPO, modified to work specifically with POTs.
{
const String path = fa->get_path();

fa->seek(0);

enum Status {
STATUS_NONE,
STATUS_READING_ID,
STATUS_READING_STRING,
STATUS_READING_CONTEXT,
STATUS_READING_PLURAL,
};

Status status = STATUS_NONE;

String msg_id;
String msg_id_plural;
String msg_context;

int line = 1;
bool entered_context = false;
bool is_eof = false;

while (!is_eof) {
String l = fa->get_line().strip_edges();
is_eof = fa->eof_reached();

// If we reached last line and it's not a content line, break, otherwise let processing that last loop.
if (is_eof && l.is_empty()) {
if (status == STATUS_READING_ID || status == STATUS_READING_CONTEXT || status == STATUS_READING_PLURAL) {
ERR_FAIL_V_MSG(Vector<Vector<String>>(), "Unexpected EOF while reading POT file at: " + path + ":" + itos(line));
} else {
break;
}
}

if (l.begins_with("msgctxt")) {
ERR_FAIL_COND_V_MSG(status != STATUS_READING_STRING && status != STATUS_READING_PLURAL, Vector<Vector<String>>(),
"Unexpected 'msgctxt', was expecting 'msgid_plural' or 'msgstr' before 'msgctxt' while parsing: " + path + ":" + itos(line));

// In POT files, "msgctxt" appears before "msgid". If we encounter a "msgctxt", we add what we have read
// and set "entered_context" to true to prevent adding twice.
if (!msg_id.is_empty()) {
Vector<String> msgs;
msgs.push_back(msg_id);
msgs.push_back(msg_context);
msgs.push_back(msg_id_plural);
list.push_back(msgs);
}
msg_context = "";
l = l.substr(7, l.length()).strip_edges();
status = STATUS_READING_CONTEXT;
entered_context = true;
}

if (l.begins_with("msgid_plural")) {
if (status != STATUS_READING_ID) {
ERR_FAIL_V_MSG(Vector<Vector<String>>(), "Unexpected 'msgid_plural', was expecting 'msgid' before 'msgid_plural' while parsing: " + path + ":" + itos(line));
}
l = l.substr(12, l.length()).strip_edges();
status = STATUS_READING_PLURAL;
} else if (l.begins_with("msgid")) {
ERR_FAIL_COND_V_MSG(status == STATUS_READING_ID, Vector<Vector<String>>(), "Unexpected 'msgid', was expecting 'msgstr' while parsing: " + path + ":" + itos(line));

if (!msg_id.is_empty() && !entered_context) {
Vector<String> msgs;
msgs.push_back(msg_id);
msgs.push_back(msg_context);
msgs.push_back(msg_id_plural);
list.push_back(msgs);
}

l = l.substr(5, l.length()).strip_edges();
status = STATUS_READING_ID;
// If we did not encounter msgctxt, we reset context to empty to reset it.
if (!entered_context) {
msg_context = "";
}
msg_id = "";
msg_id_plural = "";
entered_context = false;
}

if (l.begins_with("msgstr[")) {
ERR_FAIL_COND_V_MSG(status != STATUS_READING_PLURAL, Vector<Vector<String>>(),
"Unexpected 'msgstr[]', was expecting 'msgid_plural' before 'msgstr[]' while parsing: " + path + ":" + itos(line));
l = l.substr(9, l.length()).strip_edges();
} else if (l.begins_with("msgstr")) {
ERR_FAIL_COND_V_MSG(status != STATUS_READING_ID, Vector<Vector<String>>(),
"Unexpected 'msgstr', was expecting 'msgid' before 'msgstr' while parsing: " + path + ":" + itos(line));
l = l.substr(6, l.length()).strip_edges();
status = STATUS_READING_STRING;
}

if (l.is_empty() || l.begins_with("#")) {
line++;
continue; // Nothing to read or comment.
}

ERR_FAIL_COND_V_MSG(!l.begins_with("\"") || status == STATUS_NONE, Vector<Vector<String>>(), "Invalid line '" + l + "' while parsing: " + path + ":" + itos(line));

l = l.substr(1, l.length());
// Find final quote, ignoring escaped ones (\").
// The escape_next logic is necessary to properly parse things like \\"
// where the backslash is the one being escaped, not the quote.
int end_pos = -1;
bool escape_next = false;
for (int i = 0; i < l.length(); i++) {
if (l[i] == '\\' && !escape_next) {
escape_next = true;
continue;
}

if (l[i] == '"' && !escape_next) {
end_pos = i;
break;
}

escape_next = false;
}

ERR_FAIL_COND_V_MSG(end_pos == -1, Vector<Vector<String>>(), "Expected '\"' at end of message while parsing: " + path + ":" + itos(line));

l = l.substr(0, end_pos);
l = l.c_unescape();

if (status == STATUS_READING_ID) {
msg_id += l;
} else if (status == STATUS_READING_CONTEXT) {
msg_context += l;
} else if (status == STATUS_READING_PLURAL) {
msg_id_plural += l;
}

line++;
}
}

etl++;
}

return msgids;
return list;
}
2 changes: 1 addition & 1 deletion editor/editor_translation.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@ void load_editor_translations(const String &p_locale);
void load_property_translations(const String &p_locale);
void load_doc_translations(const String &p_locale);
void load_extractable_translations(const String &p_locale);
List<StringName> get_extractable_message_list();
Vector<Vector<String>> get_extractable_message_list();

#endif // EDITOR_TRANSLATION_H
7 changes: 2 additions & 5 deletions editor/pot_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
#include "core/error/error_macros.h"
#include "editor/editor_translation.h"
#include "editor/editor_translation_parser.h"
#include "plugins/packed_scene_translation_parser_plugin.h"

POTGenerator *POTGenerator::singleton = nullptr;

Expand Down Expand Up @@ -66,8 +65,6 @@ void POTGenerator::generate_pot(const String &p_file) {
// Clear all_translation_strings of the previous round.
all_translation_strings.clear();

List<StringName> extractable_msgids = get_extractable_message_list();

// Collect all translatable strings according to files order in "POT Generation" setting.
for (int i = 0; i < files.size(); i++) {
Vector<String> msgids;
Expand All @@ -92,8 +89,8 @@ void POTGenerator::generate_pot(const String &p_file) {
}

if (GLOBAL_GET("internationalization/locale/translation_add_builtin_strings_to_pot")) {
for (const StringName &extractable_msgid : extractable_msgids) {
_add_new_msgid(extractable_msgid, "", "", "");
for (const Vector<String> &extractable_msgids : get_extractable_message_list()) {
_add_new_msgid(extractable_msgids[0], extractable_msgids[1], extractable_msgids[2], "");
}
}

Expand Down

0 comments on commit 4509404

Please sign in to comment.