Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
Loading...
Searching...
No Matches
tuklib_integer.h
Go to the documentation of this file.
1// SPDX-License-Identifier: 0BSD
2
4//
38//
39// Authors: Lasse Collin
40// Joachim Henke
41//
43
44#ifndef TUKLIB_INTEGER_H
45#define TUKLIB_INTEGER_H
46
47#include "tuklib_common.h"
48#include <string.h>
49
50// Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
51// and such functions.
52#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
53# include <immintrin.h>
54// Only include <intrin.h> when it is needed. GCC and Clang can both
55// use __builtin's, so we only need Windows instrincs when using MSVC.
56// GCC and Clang can set _MSC_VER on Windows, so we need to exclude these
57// cases explicitly.
58#elif defined(_MSC_VER) && !TUKLIB_GNUC_REQ(3, 4) && !defined(__clang__)
59# include <intrin.h>
60#endif
61
62
64// Byte swapping //
66
67#if defined(HAVE___BUILTIN_BSWAPXX)
68 // GCC >= 4.8 and Clang
69# define byteswap16(num) __builtin_bswap16(num)
70# define byteswap32(num) __builtin_bswap32(num)
71# define byteswap64(num) __builtin_bswap64(num)
72
73#elif defined(HAVE_BYTESWAP_H)
74 // glibc, uClibc, dietlibc
75# include <byteswap.h>
76# ifdef HAVE_BSWAP_16
77# define byteswap16(num) bswap_16(num)
78# endif
79# ifdef HAVE_BSWAP_32
80# define byteswap32(num) bswap_32(num)
81# endif
82# ifdef HAVE_BSWAP_64
83# define byteswap64(num) bswap_64(num)
84# endif
85
86#elif defined(HAVE_SYS_ENDIAN_H)
87 // *BSDs and Darwin
88# include <sys/endian.h>
89# define byteswap16(num) bswap16(num)
90# define byteswap32(num) bswap32(num)
91# define byteswap64(num) bswap64(num)
92
93#elif defined(HAVE_SYS_BYTEORDER_H)
94 // Solaris
95# include <sys/byteorder.h>
96# ifdef BSWAP_16
97# define byteswap16(num) BSWAP_16(num)
98# endif
99# ifdef BSWAP_32
100# define byteswap32(num) BSWAP_32(num)
101# endif
102# ifdef BSWAP_64
103# define byteswap64(num) BSWAP_64(num)
104# endif
105# ifdef BE_16
106# define conv16be(num) BE_16(num)
107# endif
108# ifdef BE_32
109# define conv32be(num) BE_32(num)
110# endif
111# ifdef BE_64
112# define conv64be(num) BE_64(num)
113# endif
114# ifdef LE_16
115# define conv16le(num) LE_16(num)
116# endif
117# ifdef LE_32
118# define conv32le(num) LE_32(num)
119# endif
120# ifdef LE_64
121# define conv64le(num) LE_64(num)
122# endif
123#endif
124
125#ifndef byteswap16
126# define byteswap16(n) (uint16_t)( \
127 (((n) & 0x00FFU) << 8) \
128 | (((n) & 0xFF00U) >> 8) \
129 )
130#endif
131
132#ifndef byteswap32
133# define byteswap32(n) (uint32_t)( \
134 (((n) & UINT32_C(0x000000FF)) << 24) \
135 | (((n) & UINT32_C(0x0000FF00)) << 8) \
136 | (((n) & UINT32_C(0x00FF0000)) >> 8) \
137 | (((n) & UINT32_C(0xFF000000)) >> 24) \
138 )
139#endif
140
141#ifndef byteswap64
142# define byteswap64(n) (uint64_t)( \
143 (((n) & UINT64_C(0x00000000000000FF)) << 56) \
144 | (((n) & UINT64_C(0x000000000000FF00)) << 40) \
145 | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \
146 | (((n) & UINT64_C(0x00000000FF000000)) << 8) \
147 | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \
148 | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \
149 | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \
150 | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \
151 )
152#endif
153
154// Define conversion macros using the basic byte swapping macros.
155#ifdef WORDS_BIGENDIAN
156# ifndef conv16be
157# define conv16be(num) ((uint16_t)(num))
158# endif
159# ifndef conv32be
160# define conv32be(num) ((uint32_t)(num))
161# endif
162# ifndef conv64be
163# define conv64be(num) ((uint64_t)(num))
164# endif
165# ifndef conv16le
166# define conv16le(num) byteswap16(num)
167# endif
168# ifndef conv32le
169# define conv32le(num) byteswap32(num)
170# endif
171# ifndef conv64le
172# define conv64le(num) byteswap64(num)
173# endif
174#else
175# ifndef conv16be
176# define conv16be(num) byteswap16(num)
177# endif
178# ifndef conv32be
179# define conv32be(num) byteswap32(num)
180# endif
181# ifndef conv64be
182# define conv64be(num) byteswap64(num)
183# endif
184# ifndef conv16le
185# define conv16le(num) ((uint16_t)(num))
186# endif
187# ifndef conv32le
188# define conv32le(num) ((uint32_t)(num))
189# endif
190# ifndef conv64le
191# define conv64le(num) ((uint64_t)(num))
192# endif
193#endif
194
195
197// Unaligned reads and writes //
199
200// No-strict-align archs like x86-64
201// ---------------------------------
202//
203// The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
204// is bad even if the uint8_pointer is properly aligned because this kind
205// of casts break strict aliasing rules and result in undefined behavior.
206// With unaligned pointers it's even worse: compilers may emit vector
207// instructions that require aligned pointers even if non-vector
208// instructions work with unaligned pointers.
209//
210// Using memcpy() is the standard compliant way to do unaligned access.
211// Many modern compilers inline it so there is no function call overhead.
212// For those compilers that don't handle the memcpy() method well, the
213// old casting method (that violates strict aliasing) can be requested at
214// build time. A third method, casting to a packed struct, would also be
215// an option but isn't provided to keep things simpler (it's already a mess).
216// Hopefully this is flexible enough in practice.
217//
218// Some compilers on x86-64 like Clang >= 10 and GCC >= 5.1 detect that
219//
220// buf[0] | (buf[1] << 8)
221//
222// reads a 16-bit value and can emit a single 16-bit load and produce
223// identical code than with the memcpy() method. In other cases Clang and GCC
224// produce either the same or better code with memcpy(). For example, Clang 9
225// on x86-64 can detect 32-bit load but not 16-bit load.
226//
227// MSVC uses unaligned access with the memcpy() method but emits byte-by-byte
228// code for "buf[0] | (buf[1] << 8)".
229//
230// Conclusion: The memcpy() method is the best choice when unaligned access
231// is supported.
232//
233// Strict-align archs like SPARC
234// -----------------------------
235//
236// GCC versions from around 4.x to to at least 13.2.0 produce worse code
237// from the memcpy() method than from simple byte-by-byte shift-or code
238// when reading a 32-bit integer:
239//
240// (1) It may be constructed on stack using using four 8-bit loads,
241// four 8-bit stores to stack, and finally one 32-bit load from stack.
242//
243// (2) Especially with -Os, an actual memcpy() call may be emitted.
244//
245// This is true on at least on ARM, ARM64, SPARC, SPARC64, MIPS64EL, and
246// RISC-V. Of these, ARM, ARM64, and RISC-V support unaligned access in
247// some processors but not all so this is relevant only in the case when
248// GCC assumes that unaligned is not supported or -mstrict-align or
249// -mno-unaligned-access is used.
250//
251// For Clang it makes little difference. ARM64 with -O2 -mstrict-align
252// was one the very few with a minor difference: the memcpy() version
253// was one instruction longer.
254//
255// Conclusion: At least in case of GCC and Clang, byte-by-byte code is
256// the best choice for strict-align archs to do unaligned access.
257//
258// See also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111502
259//
260// Thanks to <https://godbolt.org/> it was easy to test different compilers.
261// The following is for little endian targets:
262/*
263#include <stdint.h>
264#include <string.h>
265
266uint32_t bytes16(const uint8_t *b)
267{
268 return (uint32_t)b[0]
269 | ((uint32_t)b[1] << 8);
270}
271
272uint32_t copy16(const uint8_t *b)
273{
274 uint16_t v;
275 memcpy(&v, b, sizeof(v));
276 return v;
277}
278
279uint32_t bytes32(const uint8_t *b)
280{
281 return (uint32_t)b[0]
282 | ((uint32_t)b[1] << 8)
283 | ((uint32_t)b[2] << 16)
284 | ((uint32_t)b[3] << 24);
285}
286
287uint32_t copy32(const uint8_t *b)
288{
289 uint32_t v;
290 memcpy(&v, b, sizeof(v));
291 return v;
292}
293
294void wbytes16(uint8_t *b, uint16_t v)
295{
296 b[0] = (uint8_t)v;
297 b[1] = (uint8_t)(v >> 8);
298}
299
300void wcopy16(uint8_t *b, uint16_t v)
301{
302 memcpy(b, &v, sizeof(v));
303}
304
305void wbytes32(uint8_t *b, uint32_t v)
306{
307 b[0] = (uint8_t)v;
308 b[1] = (uint8_t)(v >> 8);
309 b[2] = (uint8_t)(v >> 16);
310 b[3] = (uint8_t)(v >> 24);
311}
312
313void wcopy32(uint8_t *b, uint32_t v)
314{
315 memcpy(b, &v, sizeof(v));
316}
317*/
318
319
320#ifdef TUKLIB_FAST_UNALIGNED_ACCESS
321
322static inline uint16_t
323read16ne(const uint8_t *buf)
324{
325#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
326 return *(const uint16_t *)buf;
327#else
328 uint16_t num;
329 memcpy(&num, buf, sizeof(num));
330 return num;
331#endif
332}
333
334
335static inline uint32_t
336read32ne(const uint8_t *buf)
337{
338#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
339 return *(const uint32_t *)buf;
340#else
341 uint32_t num;
342 memcpy(&num, buf, sizeof(num));
343 return num;
344#endif
345}
346
347
348static inline uint64_t
349read64ne(const uint8_t *buf)
350{
351#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
352 return *(const uint64_t *)buf;
353#else
354 uint64_t num;
355 memcpy(&num, buf, sizeof(num));
356 return num;
357#endif
358}
359
360
361static inline void
362write16ne(uint8_t *buf, uint16_t num)
363{
364#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
365 *(uint16_t *)buf = num;
366#else
367 memcpy(buf, &num, sizeof(num));
368#endif
369 return;
370}
371
372
373static inline void
374write32ne(uint8_t *buf, uint32_t num)
375{
376#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
377 *(uint32_t *)buf = num;
378#else
379 memcpy(buf, &num, sizeof(num));
380#endif
381 return;
382}
383
384
385static inline void
386write64ne(uint8_t *buf, uint64_t num)
387{
388#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
389 *(uint64_t *)buf = num;
390#else
391 memcpy(buf, &num, sizeof(num));
392#endif
393 return;
394}
395
396
397static inline uint16_t
398read16be(const uint8_t *buf)
399{
400 uint16_t num = read16ne(buf);
401 return conv16be(num);
402}
403
404
405static inline uint16_t
406read16le(const uint8_t *buf)
407{
408 uint16_t num = read16ne(buf);
409 return conv16le(num);
410}
411
412
413static inline uint32_t
414read32be(const uint8_t *buf)
415{
416 uint32_t num = read32ne(buf);
417 return conv32be(num);
418}
419
420
421static inline uint32_t
422read32le(const uint8_t *buf)
423{
424 uint32_t num = read32ne(buf);
425 return conv32le(num);
426}
427
428
429static inline uint64_t
430read64be(const uint8_t *buf)
431{
432 uint64_t num = read64ne(buf);
433 return conv64be(num);
434}
435
436
437static inline uint64_t
438read64le(const uint8_t *buf)
439{
440 uint64_t num = read64ne(buf);
441 return conv64le(num);
442}
443
444
445// NOTE: Possible byte swapping must be done in a macro to allow the compiler
446// to optimize byte swapping of constants when using glibc's or *BSD's
447// byte swapping macros. The actual write is done in an inline function
448// to make type checking of the buf pointer possible.
449#define write16be(buf, num) write16ne(buf, conv16be(num))
450#define write32be(buf, num) write32ne(buf, conv32be(num))
451#define write64be(buf, num) write64ne(buf, conv64be(num))
452#define write16le(buf, num) write16ne(buf, conv16le(num))
453#define write32le(buf, num) write32ne(buf, conv32le(num))
454#define write64le(buf, num) write64ne(buf, conv64le(num))
455
456#else
457
458#ifdef WORDS_BIGENDIAN
459# define read16ne read16be
460# define read32ne read32be
461# define read64ne read64be
462# define write16ne write16be
463# define write32ne write32be
464# define write64ne write64be
465#else
466# define read16ne read16le
467# define read32ne read32le
468# define read64ne read64le
469# define write16ne write16le
470# define write32ne write32le
471# define write64ne write64le
472#endif
473
474
475static inline uint16_t
476read16be(const uint8_t *buf)
477{
478 uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
479 return num;
480}
481
482
483static inline uint16_t
484read16le(const uint8_t *buf)
485{
486 uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
487 return num;
488}
489
490
491static inline uint32_t
492read32be(const uint8_t *buf)
493{
494 uint32_t num = (uint32_t)buf[0] << 24;
495 num |= (uint32_t)buf[1] << 16;
496 num |= (uint32_t)buf[2] << 8;
497 num |= (uint32_t)buf[3];
498 return num;
499}
500
501
502static inline uint32_t
503read32le(const uint8_t *buf)
504{
505 uint32_t num = (uint32_t)buf[0];
506 num |= (uint32_t)buf[1] << 8;
507 num |= (uint32_t)buf[2] << 16;
508 num |= (uint32_t)buf[3] << 24;
509 return num;
510}
511
512
513static inline uint64_t
514read64be(const uint8_t *buf)
515{
516 uint64_t num = (uint64_t)buf[0] << 56;
517 num |= (uint64_t)buf[1] << 48;
518 num |= (uint64_t)buf[2] << 40;
519 num |= (uint64_t)buf[3] << 32;
520 num |= (uint64_t)buf[4] << 24;
521 num |= (uint64_t)buf[5] << 16;
522 num |= (uint64_t)buf[6] << 8;
523 num |= (uint64_t)buf[7];
524 return num;
525}
526
527
528static inline uint64_t
529read64le(const uint8_t *buf)
530{
531 uint64_t num = (uint64_t)buf[0];
532 num |= (uint64_t)buf[1] << 8;
533 num |= (uint64_t)buf[2] << 16;
534 num |= (uint64_t)buf[3] << 24;
535 num |= (uint64_t)buf[4] << 32;
536 num |= (uint64_t)buf[5] << 40;
537 num |= (uint64_t)buf[6] << 48;
538 num |= (uint64_t)buf[7] << 56;
539 return num;
540}
541
542
543static inline void
544write16be(uint8_t *buf, uint16_t num)
545{
546 buf[0] = (uint8_t)(num >> 8);
547 buf[1] = (uint8_t)num;
548 return;
549}
550
551
552static inline void
553write16le(uint8_t *buf, uint16_t num)
554{
555 buf[0] = (uint8_t)num;
556 buf[1] = (uint8_t)(num >> 8);
557 return;
558}
559
560
561static inline void
562write32be(uint8_t *buf, uint32_t num)
563{
564 buf[0] = (uint8_t)(num >> 24);
565 buf[1] = (uint8_t)(num >> 16);
566 buf[2] = (uint8_t)(num >> 8);
567 buf[3] = (uint8_t)num;
568 return;
569}
570
571
572static inline void
573write32le(uint8_t *buf, uint32_t num)
574{
575 buf[0] = (uint8_t)num;
576 buf[1] = (uint8_t)(num >> 8);
577 buf[2] = (uint8_t)(num >> 16);
578 buf[3] = (uint8_t)(num >> 24);
579 return;
580}
581
582
583static inline void
584write64be(uint8_t *buf, uint64_t num)
585{
586 buf[0] = (uint8_t)(num >> 56);
587 buf[1] = (uint8_t)(num >> 48);
588 buf[2] = (uint8_t)(num >> 40);
589 buf[3] = (uint8_t)(num >> 32);
590 buf[4] = (uint8_t)(num >> 24);
591 buf[5] = (uint8_t)(num >> 16);
592 buf[6] = (uint8_t)(num >> 8);
593 buf[7] = (uint8_t)num;
594 return;
595}
596
597
598static inline void
599write64le(uint8_t *buf, uint64_t num)
600{
601 buf[0] = (uint8_t)num;
602 buf[1] = (uint8_t)(num >> 8);
603 buf[2] = (uint8_t)(num >> 16);
604 buf[3] = (uint8_t)(num >> 24);
605 buf[4] = (uint8_t)(num >> 32);
606 buf[5] = (uint8_t)(num >> 40);
607 buf[6] = (uint8_t)(num >> 48);
608 buf[7] = (uint8_t)(num >> 56);
609 return;
610}
611
612#endif
613
614
616// Aligned reads and writes //
618
619// Separate functions for aligned reads and writes are provided since on
620// strict-align archs aligned access is much faster than unaligned access.
621//
622// Just like in the unaligned case, memcpy() is needed to avoid
623// strict aliasing violations. However, on archs that don't support
624// unaligned access the compiler cannot know that the pointers given
625// to memcpy() are aligned which results in slow code. As of C11 there is
626// no standard way to tell the compiler that we know that the address is
627// aligned but some compilers have language extensions to do that. With
628// such language extensions the memcpy() method gives excellent results.
629//
630// What to do on a strict-align system when no known language extensions
631// are available? Falling back to byte-by-byte access would be safe but ruin
632// optimizations that have been made specifically with aligned access in mind.
633// As a compromise, aligned reads will fall back to non-compliant type punning
634// but aligned writes will be byte-by-byte, that is, fast reads are preferred
635// over fast writes. This obviously isn't great but hopefully it's a working
636// compromise for now.
637//
638// __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6.
639#ifdef HAVE___BUILTIN_ASSUME_ALIGNED
640# define tuklib_memcpy_aligned(dest, src, size) \
641 memcpy(dest, __builtin_assume_aligned(src, size), size)
642#else
643# define tuklib_memcpy_aligned(dest, src, size) \
644 memcpy(dest, src, size)
645# ifndef TUKLIB_FAST_UNALIGNED_ACCESS
646# define TUKLIB_USE_UNSAFE_ALIGNED_READS 1
647# endif
648#endif
649
650
651static inline uint16_t
652aligned_read16ne(const uint8_t *buf)
653{
654#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
655 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
656 return *(const uint16_t *)buf;
657#else
658 uint16_t num;
659 tuklib_memcpy_aligned(&num, buf, sizeof(num));
660 return num;
661#endif
662}
663
664
665static inline uint32_t
666aligned_read32ne(const uint8_t *buf)
667{
668#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
669 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
670 return *(const uint32_t *)buf;
671#else
672 uint32_t num;
673 tuklib_memcpy_aligned(&num, buf, sizeof(num));
674 return num;
675#endif
676}
677
678
679static inline uint64_t
680aligned_read64ne(const uint8_t *buf)
681{
682#if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
683 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
684 return *(const uint64_t *)buf;
685#else
686 uint64_t num;
687 tuklib_memcpy_aligned(&num, buf, sizeof(num));
688 return num;
689#endif
690}
691
692
693static inline void
694aligned_write16ne(uint8_t *buf, uint16_t num)
695{
696#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
697 *(uint16_t *)buf = num;
698#else
699 tuklib_memcpy_aligned(buf, &num, sizeof(num));
700#endif
701 return;
702}
703
704
705static inline void
706aligned_write32ne(uint8_t *buf, uint32_t num)
707{
708#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
709 *(uint32_t *)buf = num;
710#else
711 tuklib_memcpy_aligned(buf, &num, sizeof(num));
712#endif
713 return;
714}
715
716
717static inline void
718aligned_write64ne(uint8_t *buf, uint64_t num)
719{
720#ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
721 *(uint64_t *)buf = num;
722#else
723 tuklib_memcpy_aligned(buf, &num, sizeof(num));
724#endif
725 return;
726}
727
728
729static inline uint16_t
730aligned_read16be(const uint8_t *buf)
731{
732 uint16_t num = aligned_read16ne(buf);
733 return conv16be(num);
734}
735
736
737static inline uint16_t
738aligned_read16le(const uint8_t *buf)
739{
740 uint16_t num = aligned_read16ne(buf);
741 return conv16le(num);
742}
743
744
745static inline uint32_t
746aligned_read32be(const uint8_t *buf)
747{
748 uint32_t num = aligned_read32ne(buf);
749 return conv32be(num);
750}
751
752
753static inline uint32_t
754aligned_read32le(const uint8_t *buf)
755{
756 uint32_t num = aligned_read32ne(buf);
757 return conv32le(num);
758}
759
760
761static inline uint64_t
762aligned_read64be(const uint8_t *buf)
763{
764 uint64_t num = aligned_read64ne(buf);
765 return conv64be(num);
766}
767
768
769static inline uint64_t
770aligned_read64le(const uint8_t *buf)
771{
772 uint64_t num = aligned_read64ne(buf);
773 return conv64le(num);
774}
775
776
777// These need to be macros like in the unaligned case.
778#define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num))
779#define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num))
780#define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num))
781#define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num))
782#define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num))
783#define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num))
784
785
787// Bit operations //
789
790static inline uint32_t
791bsr32(uint32_t n)
792{
793 // Check for ICC first, since it tends to define __GNUC__ too.
794#if defined(__INTEL_COMPILER)
795 return _bit_scan_reverse(n);
796
797#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX
798 // GCC >= 3.4 has __builtin_clz(), which gives good results on
799 // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
800 // either plain BSR (so the XOR gets optimized away) or LZCNT and
801 // XOR (if -march indicates that SSE4a instructions are supported).
802 return (uint32_t)__builtin_clz(n) ^ 31U;
803
804#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
805 uint32_t i;
806 __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
807 return i;
808
809#elif defined(_MSC_VER)
810 unsigned long i;
811 _BitScanReverse(&i, n);
812 return i;
813
814#else
815 uint32_t i = 31;
816
817 if ((n & 0xFFFF0000) == 0) {
818 n <<= 16;
819 i = 15;
820 }
821
822 if ((n & 0xFF000000) == 0) {
823 n <<= 8;
824 i -= 8;
825 }
826
827 if ((n & 0xF0000000) == 0) {
828 n <<= 4;
829 i -= 4;
830 }
831
832 if ((n & 0xC0000000) == 0) {
833 n <<= 2;
834 i -= 2;
835 }
836
837 if ((n & 0x80000000) == 0)
838 --i;
839
840 return i;
841#endif
842}
843
844
845static inline uint32_t
846clz32(uint32_t n)
847{
848#if defined(__INTEL_COMPILER)
849 return _bit_scan_reverse(n) ^ 31U;
850
851#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX
852 return (uint32_t)__builtin_clz(n);
853
854#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
855 uint32_t i;
856 __asm__("bsrl %1, %0\n\t"
857 "xorl $31, %0"
858 : "=r" (i) : "rm" (n));
859 return i;
860
861#elif defined(_MSC_VER)
862 unsigned long i;
863 _BitScanReverse(&i, n);
864 return i ^ 31U;
865
866#else
867 uint32_t i = 0;
868
869 if ((n & 0xFFFF0000) == 0) {
870 n <<= 16;
871 i = 16;
872 }
873
874 if ((n & 0xFF000000) == 0) {
875 n <<= 8;
876 i += 8;
877 }
878
879 if ((n & 0xF0000000) == 0) {
880 n <<= 4;
881 i += 4;
882 }
883
884 if ((n & 0xC0000000) == 0) {
885 n <<= 2;
886 i += 2;
887 }
888
889 if ((n & 0x80000000) == 0)
890 ++i;
891
892 return i;
893#endif
894}
895
896
897static inline uint32_t
898ctz32(uint32_t n)
899{
900#if defined(__INTEL_COMPILER)
901 return _bit_scan_forward(n);
902
903#elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX >= UINT32_MAX
904 return (uint32_t)__builtin_ctz(n);
905
906#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
907 uint32_t i;
908 __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
909 return i;
910
911#elif defined(_MSC_VER)
912 unsigned long i;
913 _BitScanForward(&i, n);
914 return i;
915
916#else
917 uint32_t i = 0;
918
919 if ((n & 0x0000FFFF) == 0) {
920 n >>= 16;
921 i = 16;
922 }
923
924 if ((n & 0x000000FF) == 0) {
925 n >>= 8;
926 i += 8;
927 }
928
929 if ((n & 0x0000000F) == 0) {
930 n >>= 4;
931 i += 4;
932 }
933
934 if ((n & 0x00000003) == 0) {
935 n >>= 2;
936 i += 2;
937 }
938
939 if ((n & 0x00000001) == 0)
940 ++i;
941
942 return i;
943#endif
944}
945
946#define bsf32 ctz32
947
948#endif
char buf[N_BUF]
Definition spewG.c:36
lzma_index ** i
Definition index.h:629
#define write64le(buf, num)
Definition tuklib_integer.h:399
#define conv64le(num)
Definition tuklib_integer.h:183
#define conv16le(num)
Definition tuklib_integer.h:177
#define conv16be(num)
Definition tuklib_integer.h:168
#define conv32be(num)
Definition tuklib_integer.h:171
#define conv64be(num)
Definition tuklib_integer.h:174
#define conv32le(num)
Definition tuklib_integer.h:180
#define tuklib_memcpy_aligned(dest, src, size)
Definition tuklib_integer.h:479
#define write16le(buf, num)
Definition tuklib_integer.h:397
#define write32le(buf, num)
Definition tuklib_integer.h:398
Common definitions for tuklib modules.
#define read32ne
Definition tuklib_integer.h:467
#define write32ne
Definition tuklib_integer.h:470
#define write64ne
Definition tuklib_integer.h:471
#define read16ne
Definition tuklib_integer.h:466
#define read64ne
Definition tuklib_integer.h:468
#define write16ne
Definition tuklib_integer.h:469