From 5d0a215158c94c21fb3ed51356543e036c3a2c3f Mon Sep 17 00:00:00 2001 From: Cryolitia PukNgae Date: Sat, 28 Dec 2024 15:43:05 +0800 Subject: [PATCH] 2 --- .devcontainer/Dockerfile | 1 - .github/workflows/cross-build.yml | 11 +-- Cargo.lock | 120 ++++++++++++++++++++++++++++++ Developer.md | 7 -- crates/lang_unicodes/Cargo.toml | 4 + crates/lang_unicodes/build.rs | 54 +++----------- 6 files changed, 138 insertions(+), 59 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index e95935ec..5ede3fc3 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,7 +1,6 @@ FROM konghayao123/best-cn-dev-container:1.1.0 RUN apt install -y llvm clang pkg-config libssl-dev RUN apt install -y protobuf-compiler && protoc --version -RUN apt install -y opencc && opencc --version RUN bash /init/rust.sh ENV RUSTUP_DIST_SERVER="https://rsproxy.cn" ENV RUSTUP_UPDATE_ROOT="https://rsproxy.cn/rustup" diff --git a/.github/workflows/cross-build.yml b/.github/workflows/cross-build.yml index c434b292..0af8ffea 100644 --- a/.github/workflows/cross-build.yml +++ b/.github/workflows/cross-build.yml @@ -80,17 +80,12 @@ jobs: scoop install main/mingw scoop install main/nodejs - pip install opencc --target $HOME - echo "OPENCC_BIN=$HOME/opencc/clib/bin/opencc" | Out-File -FilePath $env:GITHUB_ENV -Append - echo "OPENCC_DATA_PATH=$HOME/opencc/clib/share/opencc/" | Out-File -FilePath $env:GITHUB_ENV -Append - $env:Path += ";$HOME/opencc/clib/bin" - opencc --version ${{ matrix.platform.setup }} - name: Setup Linux Environment if: ${{ runner.os == 'Linux' }} run: | sudo apt update - sudo apt install -y nodejs llvm clang pkg-config libssl-dev opencc + sudo apt install -y nodejs llvm clang pkg-config libssl-dev ${{ matrix.platform.setup }} - name: Set up Homebrew id: set-up-homebrew @@ -100,7 +95,7 @@ jobs: - name: Setup MacOS Environment if: ${{ runner.os == 'macOS' }} run: | - brew install llvm protobuf opencc automake libtool + brew install llvm protobuf automake libtool ${{ matrix.platform.setup }} - name: Install Protoc @@ -170,7 +165,7 @@ jobs: run: | sudo apt update sudo apt upgrade - sudo apt install -y llvm clang pkg-config libssl-dev protobuf-compiler opencc + sudo apt install -y llvm clang pkg-config libssl-dev protobuf-compiler - name: Setup WASI Environment working-directory: . diff --git a/Cargo.lock b/Cargo.lock index ea160734..901867a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -294,6 +294,26 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.6.0", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.90", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -365,6 +385,15 @@ dependencies = [ "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -401,6 +430,17 @@ dependencies = [ "inout", ] +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "3.2.25" @@ -480,6 +520,15 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "cmake" +version = "0.1.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" +dependencies = [ + "cc", +] + [[package]] name = "cn-font-proto" version = "0.1.1" @@ -1022,6 +1071,12 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + [[package]] name = "grpc" version = "0.1.0" @@ -1542,6 +1597,7 @@ name = "lang-unicodes" version = "0.1.0" dependencies = [ "lazy_static", + "opencc-rs", ] [[package]] @@ -1556,6 +1612,16 @@ version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" +[[package]] +name = "libloading" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" +dependencies = [ + "cfg-if", + "windows-targets", +] + [[package]] name = "libredox" version = "0.1.3" @@ -1566,6 +1632,15 @@ dependencies = [ "libc", ] +[[package]] +name = "link-cplusplus" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9" +dependencies = [ + "cc", +] + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -1654,6 +1729,12 @@ dependencies = [ "rxml", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.4" @@ -1729,6 +1810,16 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1769,6 +1860,29 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "opencc-rs" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a45d82f519a62e7439dbccd6d1dcfc349588dca7099732ec031f2a97ee79e44a" +dependencies = [ + "libc", + "opencc-sys", + "tempfile", + "thiserror 2.0.8", +] + +[[package]] +name = "opencc-sys" +version = "0.3.4+1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e173b43a777be3d35c2c8734f23303308b2d1f5c5ab22e4b6ccd9387b3dfbe34" +dependencies = [ + "bindgen", + "cmake", + "link-cplusplus", +] + [[package]] name = "openssl" version = "0.10.68" @@ -2233,6 +2347,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" + [[package]] name = "rustc_version" version = "0.4.1" diff --git a/Developer.md b/Developer.md index 7aa1f54d..d811c63b 100644 --- a/Developer.md +++ b/Developer.md @@ -7,13 +7,6 @@ apt update -y apt install -y protobuf-compiler && protoc --version ``` -## 安装 opencc 命令 - -```sh -apt update -y -apt install -y opencc && opencc --version -``` - ## 安装 Wasm-sdk 安装并自动配置,执行文件在 .devcontainer/wasi-install.sh,需要在容器中执行 diff --git a/crates/lang_unicodes/Cargo.toml b/crates/lang_unicodes/Cargo.toml index 0c229e1f..89a8718e 100644 --- a/crates/lang_unicodes/Cargo.toml +++ b/crates/lang_unicodes/Cargo.toml @@ -14,3 +14,7 @@ lazy_static = "1.5.0" [lib] path = "src/lib.rs" + +[build-dependencies] +lazy_static = "1.5.0" +opencc-rs = "0.4.7" diff --git a/crates/lang_unicodes/build.rs b/crates/lang_unicodes/build.rs index 622ce20e..68ae9911 100644 --- a/crates/lang_unicodes/build.rs +++ b/crates/lang_unicodes/build.rs @@ -1,7 +1,7 @@ +use lazy_static::lazy_static; +use opencc_rs::{Config, OpenCC}; use std::collections::HashSet; use std::fs::{create_dir, exists, read_to_string}; -use std::io::Write; -use std::process::{Command, Stdio}; use std::{env, fs}; const CN_SYMBOL: &str = "⸺、。〈〉《》「」『』【】〔〕〖〗︐︑︒︓︔︕︖︐︑︒︓︔︕︖︗︘︙︰︱︳︴︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄"; @@ -15,28 +15,12 @@ fn encode_utf16(s: &char) -> u16 { *s.encode_utf16(&mut buf).iter().next().unwrap() } -fn opencc_convert( - s: String, - opencc_bin: &str, - opencc_data_path: &Option, -) -> String { - let mut binding = Command::new(opencc_bin); - let mut command = binding - .arg("-c") - .arg("s2t.json") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()); - if let Some(data_path) = opencc_data_path { - command = command.arg("--path").arg(data_path); - } - let mut child = command.spawn().unwrap(); - - let mut stdin = child.stdin.take().unwrap(); - stdin.write_all(s.as_bytes()).unwrap(); - drop(stdin); +lazy_static! { + static ref OPENCC: OpenCC = OpenCC::new([Config::S2T]).unwrap(); +} - let output = child.wait_with_output().unwrap(); - String::from_utf8_lossy(&output.stdout).to_string() +fn opencc_convert(s: String) -> String { + OPENCC.convert(s).unwrap() } /* @@ -75,17 +59,12 @@ fn opencc_convert( */ -fn process_chinese_chars(opencc_bin: &str, opencc_data: &Option) { +fn process_chinese_chars() { let sc: Vec = CN_SYMBOL.chars().chain(HAN_ZI_PIN_LV.chars()).clone().collect(); let tc: Vec = sc .iter() - .map(|i| { - opencc_convert(i.to_string(), opencc_bin, opencc_data) - .chars() - .next() - .unwrap() - }) + .map(|i| opencc_convert(i.to_string()).chars().next().unwrap()) .map(|i| encode_utf16(&i)) .collect(); let sc: Vec = sc.iter().map(encode_utf16).collect(); @@ -156,6 +135,7 @@ fn main() { println!("cargo::rerun-if-changed={}", CN_CHAR_RANK_FILE); println!("cargo::rerun-if-changed={}", HANGUL_SYL_FILE); println!("cargo::rerun-if-env-changed=CARGO_WITH_NO_EXTRA"); + if !exists("./data").unwrap() { create_dir("./data").unwrap(); } @@ -171,18 +151,6 @@ fn main() { } } - let opencc_bin: String = - env::var("OPENCC_BIN").unwrap_or("opencc".to_string()); - let opencc_data: Option = env::var("OPENCC_DATA_PATH").ok(); - // 检查 opencc 是否存在 - let status = Command::new(&opencc_bin).arg("--version").status(); - if status.is_err() { - panic!( - "opencc: {} is not installed or not in PATH \n {:?}", - opencc_bin, status - ); - } - - process_chinese_chars(&opencc_bin, &opencc_data); + process_chinese_chars(); process_korean_syllables(); }