-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtypes.rs
114 lines (96 loc) · 3.51 KB
/
types.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
//! Data types used in our public API.
use ffi::Encoding;
pub use self::Reliability::{Reliable, Unreliable};
/// Possible data formats.
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum Format {
/// Process the text as-is.
Text,
/// Try to stip HTML tags and expand entities.
Html
}
/// Is the output of the language decoder reliable?
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub enum Reliability {
/// The decoder is reasonably confident about this guess.
Reliable,
/// The decoder does not have high confidence in this guess.
Unreliable
}
impl Reliability {
/// Construct from a boolean value.
pub fn from_bool(is_reliable: bool) -> Reliability {
if is_reliable { Reliable } else { Unreliable }
}
}
/// A language code, normally two letters for common languages.
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub struct Lang(pub &'static str);
/// Hints to the decoder, which it will use to make better guesses.
///
/// ```
/// use std::default::Default;
/// use cld2::Hints;
///
/// // Specify just one hint.
/// let hints = Hints{content_language: Some("en"),
/// .. Default::default()};
/// assert_eq!(Some("en"), hints.content_language);
/// assert_eq!(None, hints.tld);
/// ```
#[derive(Debug, Default)]
pub struct Hints<'a> {
/// A value from an HTTP Content-Language header. The value "fr,en"
/// will bias the decoder towards French and English.
pub content_language: Option<&'a str>,
/// The top-level domain associated with this text. The value "fr"
/// will bias the decoder towards French.
pub tld: Option<&'a str>,
/// EXPERIMENTAL: The original encoding of the text, before it was
/// converted to UTF-8. See `Encoding` for legal values.
pub encoding: Option<Encoding>,
/// An extra language hint.
pub language: Option<Lang>
}
/// Detailed information about how well the input text matched a specific
/// language.
#[derive(Clone, Copy)]
pub struct LanguageScore {
/// The language matched.
pub language: Option<Lang>,
/// The percentage of the text which appears to be in this language.
/// Between 0 and 100.
pub percent: u8,
/// Scores near 1.0 indicate a "normal" text for this language. Scores
/// further away from 1.0 indicate strange or atypical texts.
pub normalized_score: f64
}
/// Detailed language detection results.
///
/// Note: Do not rely on this struct containing only the fields listed
/// below. It may gain extra fields in the future.
#[derive(Clone, Copy)]
pub struct DetectionResult {
/// The language detected.
pub language: Option<Lang>,
/// The scores for the top 3 candidate languages.
pub scores: [LanguageScore; 3],
/// The number of bytes of actual text found, excluding tags, etc.
pub text_bytes: i32,
/// Is this guess reliable?
pub reliability: Reliability,
/// A private field to keep the user from being able to construct
/// instances directly, so we can extend this struct without breaking
/// the API. There's probably a better way to do this.
_dummy: ()
}
impl DetectionResult {
/// EXPERIMENTAL: Create a new DetectionResult. You generally don't
/// need to call this directly.
pub fn new(language: Option<Lang>, scores: [LanguageScore; 3],
text_bytes: i32, reliability: Reliability) -> DetectionResult {
DetectionResult{language: language, scores: scores,
text_bytes: text_bytes, reliability: reliability,
_dummy: ()}
}
}