forked from andrewprock/ustl
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathuutility.h
440 lines (391 loc) · 13.4 KB
/
uutility.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
// This file is part of the uSTL library, an STL implementation.
//
// Copyright (c) 2005 by Mike Sharov <[email protected]>
// This file is free software, distributed under the MIT License.
//
/// \file uutility.h
/// \brief Utility templates.
#pragma once
#include "utypes.h"
#include "traits.h"
#if HAVE_CPP11
#include "uttraits.h"
#endif
#include "ulimits.h"
#include <assert.h>
namespace ustl {
#if __GNUC__
/// Returns the number of elements in a static vector
#define VectorSize(v) (sizeof(v) / sizeof(*v))
#else
// Old compilers will not be able to evaluate *v on an empty vector.
// The tradeoff here is that VectorSize will not be able to measure arrays of local structs.
#define VectorSize(v) (sizeof(v) / ustl::size_of_elements(1, v))
#endif
/// Returns the end() for a static vector
template <typename T, size_t N> inline constexpr T* VectorEnd (T(&a)[N]) { return &a[N]; }
/// Expands into a ptr,size expression for the given static vector; useful as link arguments.
#define VectorBlock(v) &(v)[0], VectorSize(v)
/// Expands into a begin,end expression for the given static vector; useful for algorithm arguments.
#define VectorRange(v) &(v)[0], VectorEnd(v)
/// Returns the number of bits in the given type
#define BitsInType(t) (sizeof(t) * CHAR_BIT)
/// Returns the mask of type \p t with the lowest \p n bits set.
#define BitMask(t,n) (t(~t(0)) >> (BitsInType(t) - (n)))
/// Argument that is used only in debug builds (as in an assert)
#ifndef NDEBUG
#define DebugArg(x) x
#else
#define DebugArg(x)
#endif
/// Shorthand for container iteration.
#define foreach(type,i,ctr) for (type i = (ctr).begin(); i != (ctr).end(); ++ i)
/// Shorthand for container reverse iteration.
#define eachfor(type,i,ctr) for (type i = (ctr).rbegin(); i != (ctr).rend(); ++ i)
#ifndef DOXYGEN_SHOULD_SKIP_THIS
// Macro for passing template types as macro arguments.
// These are deprecated. Use metamac macros instead. Will remove by next release.
#define TEMPLATE_FULL_DECL1(d1,t1) template <d1 t1>
#define TEMPLATE_FULL_DECL2(d1,t1,d2,t2) template <d1 t1, d2 t2>
#define TEMPLATE_FULL_DECL3(d1,t1,d2,t2,d3,t3) template <d1 t1, d2 t2, d3 t3>
#define TEMPLATE_DECL1(t1) TEMPLATE_FULL_DECL1(typename,t1)
#define TEMPLATE_DECL2(t1,t2) TEMPLATE_FULL_DECL2(typename,t1,typename,t2)
#define TEMPLATE_DECL3(t1,t2,t3) TEMPLATE_FULL_DECL3(typename,t1,typename,t2,typename,t3)
#define TEMPLATE_TYPE1(type,a1) type<a1>
#define TEMPLATE_TYPE2(type,a1,a2) type<a1,a2>
#define TEMPLATE_TYPE3(type,a1,a2,a3) type<a1,a2,a3>
#endif
/// Returns the minimum of \p a and \p b
template <typename T1, typename T2>
inline constexpr T1 min (const T1& a, const T2& b)
{
return a < b ? a : b;
}
/// Returns the maximum of \p a and \p b
template <typename T1, typename T2>
inline constexpr T1 max (const T1& a, const T2& b)
{
return b < a ? a : b;
}
/// Indexes into a static array with bounds limit
template <typename T, size_t N>
inline constexpr T& VectorElement (T(&v)[N], size_t i) { return v[min(i,N-1)]; }
/// The alignment performed by default.
const size_t c_DefaultAlignment = __alignof__(void*);
/// \brief Rounds \p n up to be divisible by \p grain
template <typename T>
inline constexpr T AlignDown (T n, size_t grain = c_DefaultAlignment)
{ return n - n % grain; }
/// \brief Rounds \p n up to be divisible by \p grain
template <typename T>
inline constexpr T Align (T n, size_t grain = c_DefaultAlignment)
{ return AlignDown (n + grain - 1, grain); }
/// Returns a nullptr pointer cast to T.
template <typename T>
inline constexpr T* NullPointer (void)
{ return nullptr; }
/// \brief Returns a non-dereferentiable value reference.
/// This is useful for passing to stream_align_of or the like which need a value but
/// don't need to actually use it.
template <typename T>
inline constexpr T& NullValue (void)
{ return *NullPointer<T>(); }
#ifndef DOXYGEN_SHOULD_SKIP_THIS
// Offsets a pointer
template <typename T>
inline T advance_ptr (T i, ptrdiff_t offset)
{ return i + offset; }
// Offsets a void pointer
template <>
inline const void* advance_ptr (const void* p, ptrdiff_t offset)
{
assert (p || !offset);
return reinterpret_cast<const uint8_t*>(p) + offset;
}
// Offsets a void pointer
template <>
inline void* advance_ptr (void* p, ptrdiff_t offset)
{
assert (p || !offset);
return reinterpret_cast<uint8_t*>(p) + offset;
}
#endif
/// Offsets an iterator
template <typename T, typename Distance>
inline T advance (T i, Distance offset)
{ return advance_ptr (i, offset); }
/// Returns the difference \p p1 - \p p2
template <typename T1, typename T2>
inline constexpr ptrdiff_t distance (T1 i1, T2 i2)
{
return i2 - i1;
}
#ifndef DOXYGEN_SHOULD_SKIP_THIS
#define UNVOID_DISTANCE(T1const,T2const) \
template <> inline constexpr ptrdiff_t distance (T1const void* p1, T2const void* p2) \
{ return (T2const uint8_t*)(p2) - (T1const uint8_t*)(p1); }
UNVOID_DISTANCE(,)
UNVOID_DISTANCE(const,const)
UNVOID_DISTANCE(,const)
UNVOID_DISTANCE(const,)
#undef UNVOID_DISTANCE
#endif
#ifndef DOXYGEN_SHOULD_SKIP_THIS
// The compiler issues a warning if an unsigned type is compared to 0.
template <typename T, bool IsSigned> struct __is_negative { inline constexpr bool operator()(const T& v) { return v < 0; } };
template <typename T> struct __is_negative<T,false> { inline constexpr bool operator()(const T&) { return false; } };
/// Warning-free way to check if \p v is negative, even if for unsigned types.
template <typename T> inline constexpr bool is_negative (const T& v) { return __is_negative<T,numeric_limits<T>::is_signed>()(v); }
#endif
/// \brief Returns the absolute value of \p v
/// Unlike the stdlib functions, this is inline and works with all types.
template <typename T>
inline constexpr T absv (T v)
{
return is_negative(v) ? -v : v;
}
/// \brief Returns -1 for negative values, 1 for positive, and 0 for 0
template <typename T>
inline constexpr T sign (T v)
{
return (0 < v) - is_negative(v);
}
/// Returns the absolute value of the distance i1 and i2
template <typename T1, typename T2>
inline constexpr size_t abs_distance (T1 i1, T2 i2)
{
return absv (distance(i1, i2));
}
/// Returns the size of \p n elements of size \p T
template <typename T>
inline constexpr size_t size_of_elements (size_t n, const T*)
{
return n * sizeof(T);
}
// Defined in byteswap.h, which is usually unusable.
#undef bswap_16
#undef bswap_32
#undef bswap_64
inline uint16_t bswap_16 (uint16_t v)
{
#if CPU_HAS_CMPXCHG8 // If it has that, it has bswap.
if (!__builtin_constant_p(v)) asm ("rorw $8, %0":"+r"(v)); else
#endif
v = v << 8 | v >> 8;
return v;
}
inline uint32_t bswap_32 (uint32_t v)
{
#if CPU_HAS_CMPXCHG8
if (!__builtin_constant_p(v)) asm ("bswap %0":"+r"(v)); else
#endif
v = v << 24 | (v & 0xFF00) << 8 | ((v >> 8) & 0xFF00) | v >> 24;
return v;
}
#if HAVE_INT64_T
inline uint64_t bswap_64 (uint64_t v)
{
#if x86_64
if (!__builtin_constant_p(v)) asm ("bswap %0":"+r"(v)); else
#endif
v = (uint64_t(bswap_32(v)) << 32) | bswap_32(v >> 32);
return v;
}
#endif
/// \brief Swaps the byteorder of \p v.
template <typename T>
inline T bswap (const T& v)
{
switch (BitsInType(T)) {
default: return v;
case 16: return T (bswap_16 (uint16_t (v)));
case 32: return T (bswap_32 (uint32_t (v)));
#if HAVE_INT64_T
case 64: return T (bswap_64 (uint64_t (v)));
#endif
}
}
#if BYTE_ORDER == BIG_ENDIAN
template <typename T> inline T le_to_native (const T& v) { return bswap (v); }
template <typename T> inline T be_to_native (const T& v) { return v; }
template <typename T> inline T native_to_le (const T& v) { return bswap (v); }
template <typename T> inline T native_to_be (const T& v) { return v; }
#elif BYTE_ORDER == LITTLE_ENDIAN
template <typename T> inline T le_to_native (const T& v) { return v; }
template <typename T> inline T be_to_native (const T& v) { return bswap (v); }
template <typename T> inline T native_to_le (const T& v) { return v; }
template <typename T> inline T native_to_be (const T& v) { return bswap (v); }
#endif // BYTE_ORDER
/// Deletes \p p and sets it to nullptr
template <typename T>
inline void Delete (T*& p)
{
delete p;
p = nullptr;
}
/// Deletes \p p as an array and sets it to nullptr
template <typename T>
inline void DeleteVector (T*& p)
{
delete [] p;
p = nullptr;
}
/// Template of making != from ! and ==
template <typename T>
inline constexpr bool operator!= (const T& x, const T& y)
{ return !(x == y); }
/// Template of making > from <
template <typename T>
inline constexpr bool operator> (const T& x, const T& y)
{ return y < x; }
/// Template of making <= from < and ==
template <typename T>
inline constexpr bool operator<= (const T& x, const T& y)
{ return !(y < x); }
/// Template of making >= from < and ==
template <typename T>
inline constexpr bool operator>= (const T& x, const T& y)
{ return !(x < y); }
/// Packs \p s multiple times into \p b. Useful for loop unrolling.
template <typename TSmall, typename TBig>
inline void pack_type (TSmall s, TBig& b)
{
b = s;
for (unsigned h = BitsInType(TSmall); h < BitsInType(TBig); h *= 2)
b = (b << h)|b;
}
/// \brief Divides \p n1 by \p n2 and rounds the result up.
/// This is in contrast to regular division, which rounds down.
template <typename T1, typename T2>
inline T1 DivRU (T1 n1, T2 n2)
{
T2 adj = n2 - 1;
if (is_negative (n1))
adj = -adj;
return (n1 + adj) / n2;
}
/// Sets the contents of \p pm to 1 and returns true if the previous value was 0.
inline bool TestAndSet (int* pm)
{
#if CPU_HAS_CMPXCHG8
bool rv;
int oldVal (1);
asm volatile ( // cmpxchg compares to %eax and swaps if equal
"cmpxchgl %3, %1\n\t"
"sete %0"
: "=a" (rv), "=m" (*pm), "=r" (oldVal)
: "2" (oldVal), "a" (0)
: "memory");
return rv;
#elif __i386__ || __x86_64__
int oldVal (1);
asm volatile ("xchgl %0, %1" : "=r"(oldVal), "=m"(*pm) : "0"(oldVal), "m"(*pm) : "memory");
return !oldVal;
#elif __sparc32__ // This has not been tested
int rv;
asm volatile ("ldstub %1, %0" : "=r"(rv), "=m"(*pm) : "m"(pm));
return !rv;
#else
const int oldVal (*pm);
*pm = 1;
return !oldVal;
#endif
}
/// Returns the index of the first set bit in \p v or \p nbv if none.
inline uoff_t FirstBit (uint32_t v, uoff_t nbv)
{
uoff_t n = nbv;
#if __i386__ || __x86_64__
if (!__builtin_constant_p(v)) asm ("bsr\t%1, %k0":"+r,r"(n):"r,m"(v)); else
#endif
#if __GNUC__
if (v) n = 31 - __builtin_clz(v);
#else
if (v) for (uint32_t m = uint32_t(1)<<(n=31); !(v & m); m >>= 1) --n;
#endif
return n;
}
/// Returns the index of the first set bit in \p v or \p nbv if none.
inline uoff_t FirstBit (uint64_t v, uoff_t nbv)
{
uoff_t n = nbv;
#if __x86_64__
if (!__builtin_constant_p(v)) asm ("bsr\t%1, %0":"+r,r"(n):"r,m"(v)); else
#endif
#if __GNUC__ && SIZE_OF_LONG >= 8
if (v) n = 63 - __builtin_clzl(v);
#elif __GNUC__ && HAVE_LONG_LONG && SIZE_OF_LONG_LONG >= 8
if (v) n = 63 - __builtin_clzll(v);
#else
if (v) for (uint64_t m = uint64_t(1)<<(n=63); !(v & m); m >>= 1) --n;
#endif
return n;
}
/// Returns the next power of 2 >= \p v.
/// Values larger than UINT32_MAX/2 will return 2^0
inline uint32_t NextPow2 (uint32_t v)
{
uint32_t r = v-1;
#if __i386__ || __x86_64__
if (!__builtin_constant_p(r)) asm("bsr\t%0, %0":"+r"(r)); else
#endif
{ r = FirstBit(r,r); if (r >= BitsInType(r)-1) r = uint32_t(-1); }
return 1<<(1+r);
}
/// Bitwise rotate value left
template <typename T>
inline T Rol (T v, size_t n)
{
#if __i386__ || __x86_64__
if (!(__builtin_constant_p(v) && __builtin_constant_p(n))) asm("rol\t%b1, %0":"+r,r"(v):"i,c"(n)); else
#endif
v = (v << n) | (v >> (BitsInType(T)-n));
return v;
}
/// Bitwise rotate value right
template <typename T>
inline T Ror (T v, size_t n)
{
#if __i386__ || __x86_64__
if (!(__builtin_constant_p(v) && __builtin_constant_p(n))) asm("ror\t%b1, %0":"+r,r"(v):"i,c"(n)); else
#endif
v = (v >> n) | (v << (BitsInType(T)-n));
return v;
}
/// \brief This template is to be used for dereferencing a type-punned pointer without a warning.
///
/// When casting a local variable to an unrelated type through a pointer (for
/// example, casting a float to a uint32_t without conversion), the resulting
/// memory location can be accessed through either pointer, which violates the
/// strict aliasing rule. While -fno-strict-aliasing option can be given to
/// the compiler, eliminating this warning, inefficient code may result in
/// some instances, because aliasing inhibits some optimizations. By using
/// this template, and by ensuring the memory is accessed in one way only,
/// efficient code can be produced without the warning. For gcc 4.1.0+.
///
template <typename DEST, typename SRC>
inline DEST noalias (const DEST&, SRC* s)
{
asm("":"+g"(s)::"memory");
union UPun { SRC s; DEST d; };
return ((UPun*)(s))->d;
}
template <typename DEST, typename SRC>
inline DEST noalias_cast (SRC s)
{
asm("":"+g"(s)::"memory");
union { SRC s; DEST d; } u = {s};
return u.d;
}
namespace simd {
/// Call after you are done using SIMD algorithms for 64 bit tuples.
#define ALL_MMX_REGS_CHANGELIST "mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7","st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)"
#if CPU_HAS_3DNOW
inline void reset_mmx (void) { asm ("femms":::ALL_MMX_REGS_CHANGELIST); }
#elif CPU_HAS_MMX
inline void reset_mmx (void) { asm ("emms":::ALL_MMX_REGS_CHANGELIST); }
#else
inline void reset_mmx (void) {}
#endif
} // namespace simd
} // namespace ustl