forked from cmus/cmus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuchar.h
267 lines (235 loc) · 6.95 KB
/
uchar.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/*
* Copyright 2008-2013 Various Authors
* Copyright 2004-2005 Timo Hirvonen
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef CMUS_UCHAR_H
#define CMUS_UCHAR_H
#include <stddef.h> /* size_t */
#include <stdbool.h>
typedef unsigned int uchar;
extern const char hex_tab[16];
/*
* Invalid bytes are or'ed with this
* for example 0xff -> 0x100000ff
*/
#define U_INVALID_MASK 0x10000000U
/*
* @uch potential unicode character
*
* Returns 1 if @uch is valid unicode character, 0 otherwise
*/
static inline int u_is_unicode(uchar uch)
{
return uch <= 0x0010ffffU;
}
/*
* Returns size of @uch in bytes
*/
static inline int u_char_size(uchar uch)
{
if (uch <= 0x0000007fU) {
return 1;
} else if (uch <= 0x000007ffU) {
return 2;
} else if (uch <= 0x0000ffffU) {
return 3;
} else if (uch <= 0x0010ffffU) {
return 4;
} else {
return 1;
}
}
/*
* Returns width of @uch (normally 1 or 2, 4 for invalid chars (<xx>))
*/
int u_char_width(uchar uch);
/*
* @str any null-terminated string
*
* Returns 1 if @str is valid UTF-8 string, 0 otherwise.
*/
int u_is_valid(const char *str);
/*
* @str valid, null-terminated UTF-8 string
*
* Returns position of next unicode character in @str.
*/
extern const char * const utf8_skip;
static inline char *u_next_char(const char *str)
{
return (char *) (str + utf8_skip[*((const unsigned char *) str)]);
}
/*
* @str valid, null-terminated UTF-8 string
*
* Retuns length of @str in UTF-8 characters.
*/
size_t u_strlen(const char *str);
/*
* @str null-terminated UTF-8 string
*
* Retuns length of @str in UTF-8 characters.
* Invalid chars are counted as single characters.
*/
size_t u_strlen_safe(const char *str);
/*
* @str null-terminated UTF-8 string
*
* Retuns width of @str.
*/
int u_str_width(const char *str);
/*
* @uch unicode character
*
* Retuns size of @uch if it were printed.
*/
int u_print_size(uchar uch);
/*
* @str null-terminated UTF-8 string
*
* Retuns size of @str if it were printed.
*/
int u_str_print_size(const char *str);
/*
* @str null-terminated UTF-8 string
* @len number of characters to measure
*
* Retuns width of the first @len characters in @str.
*/
int u_str_nwidth(const char *str, int len);
/*
* @str null-terminated UTF-8 string
* @uch unicode character
*
* Returns a pointer to the first occurrence of @uch in the @str.
*/
char *u_strchr(const char *str, uchar uch);
void u_prev_char_pos(const char *str, int *idx);
/*
* @str null-terminated UTF-8 string
* @idx pointer to byte index in @str (not UTF-8 character index!) or NULL
*
* Returns unicode character at @str[*@idx] or @str[0] if @idx is NULL.
* Stores byte index of the next char back to @idx if set.
*/
uchar u_get_char(const char *str, int *idx);
/*
* @str destination buffer
* @idx pointer to byte index in @str (not UTF-8 character index!)
* @uch unicode character
*/
void u_set_char_raw(char *str, int *idx, uchar uch);
void u_set_char(char *str, size_t *idx, uchar uch);
/*
* @dst destination buffer
* @src null-terminated UTF-8 string
* @width how much to copy (at most)
*
* Copies at most @width columns, less if null byte was hit.
* Null byte is _never_ copied.
* Remaining width is stored to @width.
*
* Returns number of _bytes_ copied.
*/
size_t u_copy_chars(char *dst, const char *src, int *width);
/*
* @dst destination buffer
* @src null-terminated UTF-8 string
* @len how many bytes are available in @dst
*
* Copies at most @len bytes, less if null byte was hit. Replaces every
* non-ascii character by '?'. Null byte is _never_ copied.
*
* Returns number of bytes written to @dst.
*/
int u_to_ascii(char *dst, const char *src, int len);
/*
* @dst destination buffer
* @src null-terminated string
*
* Copies src into dst, changing all invalid utf8 bytes into <xx>,
* where xx is the value of the byte in hex.
*
* Expects dst to be large enough to fit src + the conversions.
*/
void u_to_utf8(char *dst, const char *src);
/*
* @str null-terminated UTF-8 string, must be long enough
* @width how much to skip
* @overskip skip a final wide character even when it overshoots @width
*
* Skips @width columns in a UTF-8 string.
* Underskip (positive) or overskip (negative) is stored to @width.
*
* Returns number of _bytes_ skipped.
*/
int u_skip_chars(const char *str, int *width, bool overskip);
/*
* @str valid null-terminated UTF-8 string
*
* Converts a string into a form that is independent of case.
*
* Returns a newly allocated string
*/
char *u_casefold(const char *str);
/*
* @str1 valid, normalized, null-terminated UTF-8 string
* @str2 valid, normalized, null-terminated UTF-8 string
*
* Returns 1 if @str1 is equal to @str2, ignoring the case of the characters.
*/
int u_strcase_equal(const char *str1, const char *str2);
/*
* @str1 valid, normalized, null-terminated UTF-8 string
* @str2 valid, normalized, null-terminated UTF-8 string
* @len number of characters to consider for comparison
*
* Returns 1 if the first @len characters of @str1 and @str2 are equal,
* ignoring the case of the characters (0 otherwise).
*/
int u_strncase_equal(const char *str1, const char *str2, size_t len);
/*
* @str1 valid, normalized, null-terminated UTF-8 string
* @str2 valid, normalized, null-terminated UTF-8 string
* @len number of characters to consider for comparison
*
* Like u_strncase_equal(), but uses only base characters for comparison
* (e.g. "Trentemöller" matches "Trentemøller")
*/
int u_strncase_equal_base(const char *str1, const char *str2, size_t len);
/*
* @haystack valid, normalized, null-terminated UTF-8 string
* @needle valid, normalized, null-terminated UTF-8 string
*
* Returns position of @needle in @haystack (case insensitive comparison).
*/
char *u_strcasestr(const char *haystack, const char *needle);
/*
* @haystack valid, normalized, null-terminated UTF-8 string
* @needle valid, normalized, null-terminated UTF-8 string
*
* Like u_strcasestr(), but uses only base characters for comparison
* (e.g. "Trentemöller" matches "Trentemøller")
*/
char *u_strcasestr_base(const char *haystack, const char *needle);
/*
* @haystack null-terminated string in local encoding
* @needle valid, normalized, null-terminated UTF-8 string
*
* Like u_strcasestr_base(), but converts @haystack to UTF-8 if necessary.
*/
char *u_strcasestr_filename(const char *haystack, const char *needle);
#endif