From 9d2f3f0e2477252de56ccc6d0b8077da82eade0e Mon Sep 17 00:00:00 2001 From: Chaoses-Ib Date: Sat, 16 Dec 2023 04:04:28 +0800 Subject: [PATCH] feat(c): add C binding --- Cargo.lock | 127 +++++++++++++++++- Cargo.toml | 18 ++- README.md | 21 ++- bindings/c/Cargo.toml | 19 +++ bindings/c/README.md | 6 + bindings/c/examples/cmake/.gitignore | 3 + bindings/c/examples/cmake/CMakeLists.txt | 18 +++ bindings/c/examples/cmake/CMakePresets.json | 61 +++++++++ bindings/c/examples/cmake/main.c | 18 +++ .../c/include/ib_pinyin/diplomat_runtime.h | 70 ++++++++++ bindings/c/include/ib_pinyin/ib_pinyin.h | 31 +++++ bindings/c/include/ib_pinyin/notation.h | 59 ++++++++ bindings/c/src/lib.rs | 34 +++++ src/matcher.rs | 1 + src/minimal.rs | 2 +- 15 files changed, 484 insertions(+), 4 deletions(-) create mode 100644 bindings/c/Cargo.toml create mode 100644 bindings/c/README.md create mode 100644 bindings/c/examples/cmake/.gitignore create mode 100644 bindings/c/examples/cmake/CMakeLists.txt create mode 100644 bindings/c/examples/cmake/CMakePresets.json create mode 100644 bindings/c/examples/cmake/main.c create mode 100644 bindings/c/include/ib_pinyin/diplomat_runtime.h create mode 100644 bindings/c/include/ib_pinyin/ib_pinyin.h create mode 100644 bindings/c/include/ib_pinyin/notation.h create mode 100644 bindings/c/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index b432006..7e8ac5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,21 +26,87 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +[[package]] +name = "diplomat" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31672b3ebc3c7866c3c98726f7a9a5ac8f13962e77d3c8225f6be49a7b8c5f2" +dependencies = [ + "diplomat_core", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "diplomat-runtime" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7b0f23d549a46540e26e5490cd44c64ced0d762959f1ffdec6ab0399634cf3c" + +[[package]] +name = "diplomat_core" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfaa5e13e8b8735d2338f2836c06cd8643902ab87dda1dd07dbb351998ddc127" +dependencies = [ + "lazy_static", + "proc-macro2", + "quote", + "serde", + "smallvec", + "strck_ident", + "syn", +] + [[package]] name = "ib-pinyin" -version = "0.1.0" +version = "0.2.0" dependencies = [ "arraystring", "bitflags", "regex", ] +[[package]] +name = "ib-pinyin-c" +version = "0.2.0" +dependencies = [ + "diplomat", + "diplomat-runtime", + "ib-pinyin", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "memchr" version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + [[package]] name = "regex" version = "1.10.2" @@ -70,8 +136,67 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "serde" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.193" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smallvec" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" + +[[package]] +name = "strck" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be91090ded9d8f979d9fe921777342d37e769e0b6b7296843a7a38247240e917" + +[[package]] +name = "strck_ident" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1c3802b169b3858a44667f221c9a0b3136e6019936ea926fc97fbad8af77202" +dependencies = [ + "strck", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "typenum" version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" diff --git a/Cargo.toml b/Cargo.toml index b2f28dd..be11c31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,23 @@ +[workspace] +members = ["bindings/c"] + +[workspace.package] +version = "0.2.0" +authors = ["Chaoses-Ib"] +homepage = "https://github.com/Chaoses-Ib/IbPinyinLib" +repository = "https://github.com/Chaoses-Ib/IbPinyinLib" +license = "MIT" +keywords = ["pinyin", "cjk"] + [package] name = "ib-pinyin" -version = "0.1.0" edition = "2021" +version.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md index 4ea1bbf..b7321b4 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,25 @@ # IbPinyinLib ## 语言 -- [C++ 实现](C++/README.md)(停止维护) +### Rust +```rust +use ib_pinyin::{matcher::PinyinMatcher, pinyin::PinyinNotation}; + +let matcher = PinyinMatcher::builder("pysousuoeve") + .pinyin_notations(PinyinNotation::Ascii | PinyinNotation::AsciiFirstLetter) + .build(); +assert!(matcher.is_match("拼音搜索Everything")); +``` + +### C +```c +#include +#include + +bool is_match = ib_pinyin_is_match_u8c(u8"pysousuoeve", u8"拼音搜索Everything", PINYIN_NOTATION_ASCII_FIRST_LETTER | PINYIN_NOTATION_ASCII); +``` + +### C++ +[原实现](C++/README.md)(停止维护) ## 相关项目 - [IbEverythingExt: Everything 拼音搜索、快速选择扩展](https://github.com/Chaoses-Ib/IbEverythingExt) diff --git a/bindings/c/Cargo.toml b/bindings/c/Cargo.toml new file mode 100644 index 0000000..693ad42 --- /dev/null +++ b/bindings/c/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "ib-pinyin-c" +edition = "2021" +version.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +crate-type = ["staticlib", "cdylib"] + +[dependencies] +diplomat = "0.7.0" +diplomat-runtime = "0.7.0" +ib-pinyin = { path = "../..", features = ["minimal"] } diff --git a/bindings/c/README.md b/bindings/c/README.md new file mode 100644 index 0000000..f2c5c11 --- /dev/null +++ b/bindings/c/README.md @@ -0,0 +1,6 @@ +# IbPinyinLib.C +```sh +diplomat-tool c bindings/c/include/ib_pinyin -e bindings/c/src/lib.rs +``` + +Manually update: `notation.h` \ No newline at end of file diff --git a/bindings/c/examples/cmake/.gitignore b/bindings/c/examples/cmake/.gitignore new file mode 100644 index 0000000..1ac571b --- /dev/null +++ b/bindings/c/examples/cmake/.gitignore @@ -0,0 +1,3 @@ +.vs/ + +out/ \ No newline at end of file diff --git a/bindings/c/examples/cmake/CMakeLists.txt b/bindings/c/examples/cmake/CMakeLists.txt new file mode 100644 index 0000000..f5d7852 --- /dev/null +++ b/bindings/c/examples/cmake/CMakeLists.txt @@ -0,0 +1,18 @@ +# CMakeList.txt : CMake project for cmake, include source and define +# project specific logic here. +# +cmake_minimum_required(VERSION 3.8) + +project("cmake") + +# Add source to this project's executable. +add_executable(cmake "main.c") + +target_include_directories(cmake PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../include) + +# target_link_libraries(cmake PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../target/debug/ib_pinyin_c.lib) +target_link_libraries(cmake PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../target/debug/ib_pinyin_c.dll.lib) +add_custom_command(TARGET cmake POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${CMAKE_CURRENT_SOURCE_DIR}/../../../../target/debug/ib_pinyin_c.dll" + $) diff --git a/bindings/c/examples/cmake/CMakePresets.json b/bindings/c/examples/cmake/CMakePresets.json new file mode 100644 index 0000000..abf4065 --- /dev/null +++ b/bindings/c/examples/cmake/CMakePresets.json @@ -0,0 +1,61 @@ +{ + "version": 3, + "configurePresets": [ + { + "name": "windows-base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/out/build/${presetName}", + "installDir": "${sourceDir}/out/install/${presetName}", + "cacheVariables": { + "CMAKE_C_COMPILER": "cl.exe", + "CMAKE_CXX_COMPILER": "cl.exe" + }, + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } + }, + { + "name": "x64-debug", + "displayName": "x64 Debug", + "inherits": "windows-base", + "architecture": { + "value": "x64", + "strategy": "external" + }, + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "x64-release", + "displayName": "x64 Release", + "inherits": "x64-debug", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + }, + { + "name": "x86-debug", + "displayName": "x86 Debug", + "inherits": "windows-base", + "architecture": { + "value": "x86", + "strategy": "external" + }, + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "x86-release", + "displayName": "x86 Release", + "inherits": "x86-debug", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + } + ] +} diff --git a/bindings/c/examples/cmake/main.c b/bindings/c/examples/cmake/main.c new file mode 100644 index 0000000..d6ae40d --- /dev/null +++ b/bindings/c/examples/cmake/main.c @@ -0,0 +1,18 @@ +#include +#include +#include +#include + +int main() +{ + const char *pattern = u8"pysousuoeve"; + const char *haystack = u8"拼音搜索Everything"; + // 0x3 + const PinyinNotation notations = PINYIN_NOTATION_ASCII_FIRST_LETTER | PINYIN_NOTATION_ASCII; + + printf("%d\n", ib_pinyin_is_match_u8(pattern, strlen(pattern), haystack, strlen(haystack), notations)); + + printf("%d\n", ib_pinyin_is_match_u8c(pattern, haystack, notations)); + + return 0; +} diff --git a/bindings/c/include/ib_pinyin/diplomat_runtime.h b/bindings/c/include/ib_pinyin/diplomat_runtime.h new file mode 100644 index 0000000..de0f9c7 --- /dev/null +++ b/bindings/c/include/ib_pinyin/diplomat_runtime.h @@ -0,0 +1,70 @@ +#ifndef DIPLOMAT_RUNTIME_C_H +#define DIPLOMAT_RUNTIME_C_H + +#include +#include +#include +#include + +// uchar.h doesn't always exist, but char32_t is always available +// in C++ anyway +#ifndef __cplusplus +#ifdef __APPLE__ +#include +typedef uint16_t char16_t; +typedef uint32_t char32_t; +#else +#include +#endif +#endif + + +#ifdef __cplusplus +namespace capi { +extern "C" { +#endif + +typedef struct DiplomatWriteable { + void* context; + char* buf; + size_t len; + size_t cap; + void (*flush)(struct DiplomatWriteable*); + bool (*grow)(struct DiplomatWriteable*, size_t); +} DiplomatWriteable; + +DiplomatWriteable diplomat_simple_writeable(char* buf, size_t buf_size); + +typedef struct DiplomatStringView { + const char* data; + size_t len; +} DiplomatStringView; + +#define MAKE_SLICE_VIEW(name, c_ty) \ + typedef struct Diplomat##name##View { \ + const c_ty* data; \ + size_t len; \ + } Diplomat##name##View; + +MAKE_SLICE_VIEW(I8, int8_t) +MAKE_SLICE_VIEW(U8, uint8_t) +MAKE_SLICE_VIEW(I16, int16_t) +MAKE_SLICE_VIEW(U16, uint16_t) +MAKE_SLICE_VIEW(I32, int32_t) +MAKE_SLICE_VIEW(U32, uint32_t) +MAKE_SLICE_VIEW(I64, int64_t) +MAKE_SLICE_VIEW(U64, uint64_t) +MAKE_SLICE_VIEW(Isize, intptr_t) +MAKE_SLICE_VIEW(Usize, size_t) +MAKE_SLICE_VIEW(F32, float) +MAKE_SLICE_VIEW(F64, double) +MAKE_SLICE_VIEW(Bool, bool) +MAKE_SLICE_VIEW(Char, char32_t) + + +#ifdef __cplusplus +} // extern "C" +} // namespace capi +#endif + +#endif diff --git a/bindings/c/include/ib_pinyin/ib_pinyin.h b/bindings/c/include/ib_pinyin/ib_pinyin.h new file mode 100644 index 0000000..2bb294c --- /dev/null +++ b/bindings/c/include/ib_pinyin/ib_pinyin.h @@ -0,0 +1,31 @@ +#ifndef ib_pinyin_H +#define ib_pinyin_H +#include +#include +#include +#include +#include "diplomat_runtime.h" + +#ifdef __cplusplus +namespace capi { +#endif + +typedef struct ib_pinyin ib_pinyin; +#ifdef __cplusplus +} // namespace capi +#endif +#ifdef __cplusplus +namespace capi { +extern "C" { +#endif + +bool ib_pinyin_is_match_u8(const char* pattern_data, size_t pattern_len, const char* haystack_data, size_t haystack_len, uint32_t pinyin_notations); + +bool ib_pinyin_is_match_u8c(const uint8_t* pattern, const uint8_t* haystack, uint32_t pinyin_notations); +void ib_pinyin_destroy(ib_pinyin* self); + +#ifdef __cplusplus +} // extern "C" +} // namespace capi +#endif +#endif diff --git a/bindings/c/include/ib_pinyin/notation.h b/bindings/c/include/ib_pinyin/notation.h new file mode 100644 index 0000000..d251937 --- /dev/null +++ b/bindings/c/include/ib_pinyin/notation.h @@ -0,0 +1,59 @@ +#pragma once +#include + +/// All pinyin notations are in lower case (`py.to_lowercase() == py`). +typedef uint32_t PinyinNotation; + +/// e.g. "pīn", "yīn" +#define PINYIN_NOTATION_UNICODE 0x8 + +/// 全拼 +/// +/// e.g. "pin", "yin" +/// +/// See [全拼](https://zh.wikipedia.org/wiki/全拼) for details. +#define PINYIN_NOTATION_ASCII 0x2 + +/// 带声调全拼 +/// +/// The tone digit is in `1..=5`. See [tones](https://en.wikipedia.org/wiki/Pinyin#Tones) for details. +/// +/// e.g. "pin1", "yin1" +#define PINYIN_NOTATION_ASCII_TONE 0x4 + +/// 简拼 +/// +/// e.g. "p", "y" +/// +/// See [简拼](https://zh.wikipedia.org/wiki/简拼) for details. +#define PINYIN_NOTATION_ASCII_FIRST_LETTER 0x1 + +/// 智能 ABC 双拼 +/// +/// See [智能ABC输入法](https://zh.wikipedia.org/wiki/智能ABC输入法#双拼方案) for details. +#define PINYIN_NOTATION_DILETTER_ABC 0x10 + +/// 拼音加加双拼 +/// +/// See [拼音加加](https://zh.wikipedia.org/wiki/拼音加加#双拼方案) for details. +#define PINYIN_NOTATION_DiletterJiajia 0x20 + +/// 微软双拼 +/// +/// See [微软拼音输入法](https://zh.wikipedia.org/wiki/微软拼音输入法#双拼方案) for details. +#define PINYIN_NOTATION_DiletterMicrosoft 0x40 + +/// 华宇双拼(紫光双拼) +/// +/// See [华宇拼音输入法](https://zh.wikipedia.org/wiki/华宇拼音输入法#双拼方案) for details. +#define PINYIN_NOTATION_DiletterThunisoft 0x80 + +/// 小鹤双拼 +/// +/// See [小鹤双拼](https://flypy.com/) for details. +#define PINYIN_NOTATION_DiletterXiaohe 0x100 + +/// 自然码双拼 +/// +/// See [自然码](https://zh.wikipedia.org/zh-cn/自然码) for details. +#define PINYIN_NOTATION_DiletterZrm 0x200 \ No newline at end of file diff --git a/bindings/c/src/lib.rs b/bindings/c/src/lib.rs new file mode 100644 index 0000000..35c21ce --- /dev/null +++ b/bindings/c/src/lib.rs @@ -0,0 +1,34 @@ +//! TODO: winres + +#[diplomat::bridge] +mod ffi { + use std::ffi::CStr; + + use ::ib_pinyin::{minimal, pinyin::PinyinNotation}; + + /// https://github.com/rust-diplomat/diplomat/issues/392 + #[allow(non_camel_case_types)] + #[diplomat::opaque] + pub struct ib_pinyin; + + impl ib_pinyin { + pub fn is_match_u8(pattern: &str, haystack: &str, pinyin_notations: u32) -> bool { + minimal::is_pinyin_match( + pattern, + haystack, + PinyinNotation::from_bits_truncate(pinyin_notations), + ) + } + + pub fn is_match_u8c(pattern: &u8, haystack: &u8, pinyin_notations: u32) -> bool { + (|| -> Result { + Ok(Self::is_match_u8( + unsafe { CStr::from_ptr(pattern as *const _ as *const i8) }.to_str()?, + unsafe { CStr::from_ptr(haystack as *const _ as *const i8) }.to_str()?, + pinyin_notations, + )) + })() + .unwrap_or(false) + } + } +} diff --git a/src/matcher.rs b/src/matcher.rs index 819c152..fc92efb 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -141,6 +141,7 @@ impl<'a> PinyinMatcherBuilder<'a> { /// TODO: No-pinyin pattern optimization /// TODO: Match Ascii only after AsciiFirstLetter; get_pinyins_and_for_each /// TODO: Anchors, `*_at` +/// TODO: UTF-16 and UCS-4 /// TODO: Unicode normalization /// TODO: Tail-call optimization /// TODO: No-hanzi haystack optimization (0.2/0.9%) diff --git a/src/minimal.rs b/src/minimal.rs index 779eef6..2f8fd07 100644 --- a/src/minimal.rs +++ b/src/minimal.rs @@ -7,7 +7,7 @@ use crate::{ pinyin::{PinyinData, PinyinNotation}, }; -fn pinyin_data() -> &'static PinyinData { +pub fn pinyin_data() -> &'static PinyinData { static PINYIN_DATA: OnceLock = OnceLock::new(); PINYIN_DATA.get_or_init(|| PinyinData::new(PinyinNotation::empty())) }