Skip to content

Commit

Permalink
Improve surrogate handling readability
Browse files Browse the repository at this point in the history
- add inline function to test and convert surrogates
  is_surrogate(c), is_hi_surrogate(c), is_lo_surrogate(c),
  get_hi_surrogate(c), get_lo_surrogate(c), from_surrogate(hi, lo)
- use names for BC header offsets and lengths in libregexp.c
- remove strict aliasing violations in `lre_exec_backtrack()`
- pass all context variables to XXX_CHAR macros in `lre_exec_backtrack()`
  • Loading branch information
chqrlie committed Feb 19, 2024
1 parent 8d932de commit 12c91df
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 124 deletions.
39 changes: 38 additions & 1 deletion cutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@
#ifndef countof
#define countof(x) (sizeof(x) / sizeof((x)[0]))
#endif

#ifndef container_of
/* return the pointer of type 'type *' containing 'ptr' as field 'member' */
#define container_of(ptr, type, member) ((type *)((uint8_t *)(ptr) - offsetof(type, member)))
#endif

typedef int BOOL;

Expand Down Expand Up @@ -207,17 +208,22 @@ static inline void put_u8(uint8_t *tab, uint8_t val)
*tab = val;
}

#ifndef bswap16
static inline uint16_t bswap16(uint16_t x)
{
return (x >> 8) | (x << 8);
}
#endif

#ifndef bswap32
static inline uint32_t bswap32(uint32_t v)
{
return ((v & 0xff000000) >> 24) | ((v & 0x00ff0000) >> 8) |
((v & 0x0000ff00) << 8) | ((v & 0x000000ff) << 24);
}
#endif

#ifndef bswap64
static inline uint64_t bswap64(uint64_t v)
{
return ((v & ((uint64_t)0xff << (7 * 8))) >> (7 * 8)) |
Expand All @@ -229,6 +235,7 @@ static inline uint64_t bswap64(uint64_t v)
((v & ((uint64_t)0xff << (1 * 8))) << (5 * 8)) |
((v & ((uint64_t)0xff << (0 * 8))) << (7 * 8));
}
#endif

/* XXX: should take an extra argument to pass slack information to the caller */
typedef void *DynBufReallocFunc(void *opaque, void *ptr, size_t size);
Expand Down Expand Up @@ -278,6 +285,36 @@ static inline void dbuf_set_error(DynBuf *s)
int unicode_to_utf8(uint8_t *buf, unsigned int c);
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);

static inline BOOL is_surrogate(uint32_t c)
{
return (c >> 11) == (0xD800 >> 11); // 0xD800-0xDFFF
}

static inline BOOL is_hi_surrogate(uint32_t c)
{
return (c >> 10) == (0xD800 >> 10); // 0xD800-0xDBFF
}

static inline BOOL is_lo_surrogate(uint32_t c)
{
return (c >> 10) == (0xDC00 >> 10); // 0xDC00-0xDFFF
}

static inline uint32_t get_hi_surrogate(uint32_t c)
{
return (c >> 10) - (0x10000 >> 10) + 0xD800;
}

static inline uint32_t get_lo_surrogate(uint32_t c)
{
return (c & 0x3FF) | 0xDC00;
}

static inline uint32_t from_surrogate(uint32_t hi, uint32_t lo)
{
return 0x10000 + 0x400 * (hi - 0xD800) + (lo - 0xDC00);
}

static inline int from_hex(int c)
{
if (c >= '0' && c <= '9')
Expand Down
Loading

0 comments on commit 12c91df

Please sign in to comment.