Skip to content

Commit

Permalink
fix(path): convert to native encoding on Windows (rime#806)
Browse files Browse the repository at this point in the history
refactor: convert path to native encoding on Windows

feat(rime_api): provide secure version of path getter functions `RimeApi::get_*_dir_s`.

Follow @fxliang 's PR, use `u8path` on Windows to convert UTF-8 string
to Windows native path.

Closes rime#804
Fixes rime/weasel#576
Fixes rime/weasel#1080

BREAKING CHANGE: Most `string` filenames in APIs are changed to `path`;
`installation.yaml` should be UTF-8 encoded.

Previouly on Windows, the file can be written in local encoding to
enable paths with non-ASCII characters. It should be updated to UTF-8
after this change.

Details of the code refactor

Wrap `std::filesystem::path` in a thin wrapper class `rime::path` which calls `std::filesystem::u8path` in the constructor on Windows.

Operator `/=` and `/` are also overloaded to convert the right operand from UTF-8 string to native path.

Follow these rules to apply correct conversion between `string` and `rime::path`:

- construct `rime::path` with UTF-8 encoded string;
- get native string by `path::u8string`;
- to extract UTF-8 string from `path`, for example to find schema ID from file name, call `path::u8string`;
- avoid implicit conversion from string, which results in `std::filesystem::path` without performing UTF-8 to native conversion;
- explicitly construct `rime::path` from `std::filesystem::path` before append operation, to ensure the overloaded operator with string conversion is used.
  • Loading branch information
lotem authored and graphemecluster committed Feb 6, 2024
1 parent f7be424 commit 617c863
Show file tree
Hide file tree
Showing 74 changed files with 649 additions and 565 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ RUN apt update && apt install -y \
COPY / /librime
WORKDIR /librime/plugins
RUN git clone https://github.com/rime/librime-charcode charcode && \
git clone https://github.com/hchunhui/librime-lua lua && \
# fix it
# git clone https://github.com/hchunhui/librime-lua lua && \
git clone https://github.com/rime/librime-predict predict && \
git clone https://github.com/lotem/librime-octagram octagram

Expand Down
12 changes: 6 additions & 6 deletions plugins/plugins_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ namespace rime {

class PluginManager {
public:
void LoadPlugins(fs::path plugins_dir);
void LoadPlugins(path plugins_dir);

static string plugin_name_of(fs::path plugin_file);
static string plugin_name_of(path plugin_file);

static PluginManager& instance();

Expand All @@ -32,14 +32,14 @@ class PluginManager {
map<string, boost::dll::shared_library> plugin_libs_;
};

void PluginManager::LoadPlugins(fs::path plugins_dir) {
void PluginManager::LoadPlugins(path plugins_dir) {
ModuleManager& mm = ModuleManager::instance();
if (!fs::is_directory(plugins_dir)) {
return;
}
LOG(INFO) << "loading plugins from " << plugins_dir;
for (fs::directory_iterator iter(plugins_dir), end; iter != end; ++iter) {
fs::path plugin_file = iter->path();
path plugin_file = iter->path();
if (plugin_file.extension() == boost::dll::shared_library::suffix()) {
fs::file_status plugin_file_status = fs::status(plugin_file);
if (fs::is_regular_file(plugin_file_status)) {
Expand Down Expand Up @@ -69,7 +69,7 @@ void PluginManager::LoadPlugins(fs::path plugins_dir) {
}
}

string PluginManager::plugin_name_of(fs::path plugin_file) {
string PluginManager::plugin_name_of(path plugin_file) {
string name = plugin_file.stem().string();
// remove prefix "(lib)rime-"
if (boost::starts_with(name, "librime-")) {
Expand All @@ -94,7 +94,7 @@ PluginManager& PluginManager::instance() {
} // namespace rime

static void rime_plugins_initialize() {
rime::PluginManager::instance().LoadPlugins(RIME_PLUGINS_DIR);
rime::PluginManager::instance().LoadPlugins(rime::path(RIME_PLUGINS_DIR));
}

static void rime_plugins_finalize() {}
Expand Down
4 changes: 2 additions & 2 deletions src/rime/algo/algebra.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ void Script::Merge(const string& s,
}
}

void Script::Dump(const string& file_name) const {
std::ofstream out(file_name.c_str());
void Script::Dump(const path& file_path) const {
std::ofstream out(file_path.c_str());
for (const value_type& v : *this) {
bool first = true;
for (const Spelling& s : v.second) {
Expand Down
2 changes: 1 addition & 1 deletion src/rime/algo/algebra.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Script : public map<string, vector<Spelling>> {
void Merge(const string& s,
const SpellingProperties& sp,
const vector<Spelling>& v);
void Dump(const string& file_name) const;
void Dump(const path& file_path) const;
};

class Projection {
Expand Down
4 changes: 2 additions & 2 deletions src/rime/algo/utilities.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ int CompareVersionString(const string& x, const string& y) {
ChecksumComputer::ChecksumComputer(uint32_t initial_remainder)
: crc_(initial_remainder) {}

void ChecksumComputer::ProcessFile(const string& file_name) {
std::ifstream fin(file_name.c_str());
void ChecksumComputer::ProcessFile(const path& file_path) {
std::ifstream fin(file_path.c_str());
std::stringstream buffer;
buffer << fin.rdbuf();
const auto& file_content(buffer.str());
Expand Down
6 changes: 3 additions & 3 deletions src/rime/algo/utilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ int CompareVersionString(const string& x, const string& y);
class ChecksumComputer {
public:
explicit ChecksumComputer(uint32_t initial_remainder = 0);
void ProcessFile(const string& file_name);
void ProcessFile(const path& file_path);
uint32_t Checksum();

private:
boost::crc_32_type crc_;
};

inline uint32_t Checksum(const string& file_name) {
inline uint32_t Checksum(const path& file_path) {
ChecksumComputer c;
c.ProcessFile(file_name);
c.ProcessFile(file_path);
return c.Checksum();
}

Expand Down
58 changes: 58 additions & 0 deletions src/rime/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <rime/build_config.h>

#include <filesystem>
#include <functional>
#include <list>
#include <map>
Expand Down Expand Up @@ -80,6 +81,63 @@ inline an<T> New(Args&&... args) {
using boost::signals2::connection;
using boost::signals2::signal;

class path : public std::filesystem::path {
using fs_path = std::filesystem::path;

public:
path() : fs_path() {}
path(const fs_path& p) : fs_path(p) {}
path(fs_path&& p) : fs_path(std::move(p)) {}
#ifdef _WIN32
// convert utf-8 string to native encoding path.
explicit path(const std::string& utf8_path)
: fs_path(std::filesystem::u8path(utf8_path)) {}
explicit path(const char* utf8_path)
: fs_path(std::filesystem::u8path(utf8_path)) {}
#else
// disable implicit conversion from string to path for development purpose.
explicit path(const std::string& utf8_path) : fs_path(utf8_path) {}
explicit path(const char* utf8_path) : fs_path(utf8_path) {}
#endif

path& operator/=(const path& p) {
return *this = fs_path::operator/=(p);
}
path& operator/=(const fs_path& p) {
return *this = fs_path::operator/=(p);
}
// convert UTF-8 encoded string to native encoding, then append.
path& operator/=(const std::string& p) {
return *this /= path(p);
}
path& operator/=(const char* p) {
return *this /= path(p);
}

friend path operator/(const path& lhs, const path& rhs) {
return path(lhs) /= rhs;
}
friend path operator/(const path& lhs, const fs_path& rhs) {
return path(lhs) /= rhs;
}
friend path operator/(const fs_path& lhs, const path& rhs) {
return path(lhs) /= rhs;
}
// convert UTF-8 encoded string to native encoding, then append.
friend path operator/(const path& lhs, const std::string& rhs) {
return path(lhs) /= path(rhs);
}
friend path operator/(const path& lhs, const char* rhs) {
return path(lhs) /= path(rhs);
}
friend path operator/(const fs_path& lhs, const std::string& rhs) {
return path(lhs) /= path(rhs);
}
friend path operator/(const fs_path& lhs, const char* rhs) {
return path(lhs) /= path(rhs);
}
};

} // namespace rime

#endif // RIME_COMMON_H_
6 changes: 3 additions & 3 deletions src/rime/config/build_info_plugin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ bool BuildInfoPlugin::ReviewLinkOutput(ConfigCompiler* compiler,
timestamps[resource->resource_id] = 0;
return;
}
auto file_name = resource->data->file_name();
if (file_name.empty()) {
const auto& file_path = resource->data->file_path();
if (file_path.empty()) {
LOG(WARNING) << "resource '" << resource->resource_id
<< "' is not persisted.";
timestamps[resource->resource_id] = 0;
return;
}
// TODO: store as 64-bit number to avoid the year 2038 problem
timestamps[resource->resource_id] =
(int)filesystem::to_time_t(std::filesystem::last_write_time(file_name));
(int)filesystem::to_time_t(std::filesystem::last_write_time(file_path));
});
#endif
return true;
Expand Down
2 changes: 1 addition & 1 deletion src/rime/config/config_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ an<ConfigResource> ConfigCompiler::Compile(const string& file_name) {
graph_->resources[resource_id] = resource;
Push(resource);
resource->loaded = resource->data->LoadFromFile(
resource_resolver_->ResolvePath(resource_id).string(), this);
resource_resolver_->ResolvePath(resource_id), this);
Pop();
if (plugin_)
plugin_->ReviewCompileOutput(this, resource);
Expand Down
11 changes: 5 additions & 6 deletions src/rime/config/config_component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ bool Config::SaveToStream(std::ostream& stream) {
return data_->SaveToStream(stream);
}

bool Config::LoadFromFile(const string& file_name) {
return data_->LoadFromFile(file_name, nullptr);
bool Config::LoadFromFile(const path& file_path) {
return data_->LoadFromFile(file_path, nullptr);
}

bool Config::SaveToFile(const string& file_name) {
return data_->SaveToFile(file_name);
bool Config::SaveToFile(const path& file_path) {
return data_->SaveToFile(file_path);
}

bool Config::IsNull(const string& path) {
Expand Down Expand Up @@ -191,8 +191,7 @@ an<ConfigData> ConfigComponentBase::GetConfigData(const string& file_name) {
an<ConfigData> ConfigLoader::LoadConfig(ResourceResolver* resource_resolver,
const string& config_id) {
auto data = New<ConfigData>();
data->LoadFromFile(resource_resolver->ResolvePath(config_id).string(),
nullptr);
data->LoadFromFile(resource_resolver->ResolvePath(config_id), nullptr);
data->set_auto_save(auto_save_);
return data;
}
Expand Down
4 changes: 2 additions & 2 deletions src/rime/config/config_component.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class Config : public Class<Config, const string&>, public ConfigItemRef {

bool LoadFromStream(std::istream& stream);
bool SaveToStream(std::ostream& stream);
RIME_API bool LoadFromFile(const string& file_name);
RIME_API bool SaveToFile(const string& file_name);
RIME_API bool LoadFromFile(const path& file_path);
RIME_API bool SaveToFile(const path& file_path);

// access a tree node of a particular type with "path/to/node"
RIME_API bool IsNull(const string& path);
Expand Down
55 changes: 27 additions & 28 deletions src/rime/config/config_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ an<ConfigItem> ConvertFromYaml(const YAML::Node& yaml_node,
void EmitYaml(an<ConfigItem> node, YAML::Emitter* emitter, int depth);

ConfigData::~ConfigData() {
if (auto_save_ && modified_ && !file_name_.empty())
SaveToFile(file_name_);
if (auto_save_ && modified_ && !file_path_.empty())
SaveToFile(file_path_);
}

bool ConfigData::LoadFromStream(std::istream& stream) {
Expand Down Expand Up @@ -56,19 +56,18 @@ bool ConfigData::SaveToStream(std::ostream& stream) {
return true;
}

bool ConfigData::LoadFromFile(const string& file_name,
ConfigCompiler* compiler) {
bool ConfigData::LoadFromFile(const path& file_path, ConfigCompiler* compiler) {
// update status
file_name_ = file_name;
file_path_ = file_path;
modified_ = false;
root.reset();
if (!std::filesystem::exists(file_name)) {
LOG(WARNING) << "nonexistent config file '" << file_name << "'.";
if (!std::filesystem::exists(file_path)) {
LOG(WARNING) << "nonexistent config file '" << file_path << "'.";
return false;
}
LOG(INFO) << "loading config file '" << file_name << "'.";
LOG(INFO) << "loading config file '" << file_path << "'.";
try {
YAML::Node doc = YAML::LoadFile(file_name);
YAML::Node doc = YAML::LoadFile(file_path.string());
root = ConvertFromYaml(doc, compiler);
} catch (YAML::Exception& e) {
LOG(ERROR) << "Error parsing YAML: " << e.what();
Expand All @@ -77,17 +76,17 @@ bool ConfigData::LoadFromFile(const string& file_name,
return true;
}

bool ConfigData::SaveToFile(const string& file_name) {
bool ConfigData::SaveToFile(const path& file_path) {
// update status
file_name_ = file_name;
file_path_ = file_path;
modified_ = false;
if (file_name.empty()) {
if (file_path.empty()) {
// not really saving
return false;
}
LOG(INFO) << "saving config file '" << file_name << "'.";
LOG(INFO) << "saving config file '" << file_path << "'.";
// dump tree
std::ofstream out(file_name.c_str());
std::ofstream out(file_path.c_str());
return SaveToStream(out);
}

Expand Down Expand Up @@ -175,29 +174,29 @@ an<ConfigItemRef> TypeCheckedCopyOnWrite(an<ConfigItemRef> parent,
}

an<ConfigItemRef> TraverseCopyOnWrite(an<ConfigItemRef> head,
const string& path) {
DLOG(INFO) << "TraverseCopyOnWrite(" << path << ")";
if (path.empty() || path == "/") {
const string& node_path) {
DLOG(INFO) << "TraverseCopyOnWrite(" << node_path << ")";
if (node_path.empty() || node_path == "/") {
return head;
}
vector<string> keys = ConfigData::SplitPath(path);
vector<string> keys = ConfigData::SplitPath(node_path);
size_t n = keys.size();
for (size_t i = 0; i < n; ++i) {
const auto& key = keys[i];
if (auto child = TypeCheckedCopyOnWrite(head, key)) {
head = child;
} else {
LOG(ERROR) << "while writing to " << path;
LOG(ERROR) << "while writing to " << node_path;
return nullptr;
}
}
return head;
}

bool ConfigData::TraverseWrite(const string& path, an<ConfigItem> item) {
LOG(INFO) << "write: " << path;
bool ConfigData::TraverseWrite(const string& node_path, an<ConfigItem> item) {
LOG(INFO) << "write: " << node_path;
auto root = New<ConfigDataRootRef>(this);
if (auto target = TraverseCopyOnWrite(root, path)) {
if (auto target = TraverseCopyOnWrite(root, node_path)) {
*target = item;
set_modified();
return true;
Expand All @@ -206,10 +205,10 @@ bool ConfigData::TraverseWrite(const string& path, an<ConfigItem> item) {
}
}

vector<string> ConfigData::SplitPath(const string& path) {
vector<string> ConfigData::SplitPath(const string& node_path) {
vector<string> keys;
auto is_separator = boost::is_any_of("/");
auto trimmed_path = boost::trim_left_copy_if(path, is_separator);
auto trimmed_path = boost::trim_left_copy_if(node_path, is_separator);
boost::split(keys, trimmed_path, is_separator);
return keys;
}
Expand All @@ -218,12 +217,12 @@ string ConfigData::JoinPath(const vector<string>& keys) {
return boost::join(keys, "/");
}

an<ConfigItem> ConfigData::Traverse(const string& path) {
DLOG(INFO) << "traverse: " << path;
if (path.empty() || path == "/") {
an<ConfigItem> ConfigData::Traverse(const string& node_path) {
DLOG(INFO) << "traverse: " << node_path;
if (node_path.empty() || node_path == "/") {
return root;
}
vector<string> keys = SplitPath(path);
vector<string> keys = SplitPath(node_path);
// find the YAML::Node, and wrap it!
an<ConfigItem> p = root;
for (auto it = keys.begin(), end = keys.end(); it != end; ++it) {
Expand Down
Loading

0 comments on commit 617c863

Please sign in to comment.