10#ifndef BROTLI_ENC_HASH_H_
11#define BROTLI_ENC_HASH_H_
16#include <brotli/types.h>
29#if defined(__cplusplus) || defined(c_plusplus)
46 size_t dict_num_lookups;
47 size_t dict_num_matches;
61static const uint32_t kCutoffTransformsCount = 10;
64static const uint64_t kCutoffTransforms =
80static const uint32_t kHashMul32 = 0x1E35A7BD;
81static const uint64_t kHashMul64 =
85 uint32_t
h = BROTLI_UNALIGNED_LOAD32LE(
data) * kHashMul32;
88 return h >> (32 - 14);
93 if (num_distances > 4) {
94 int last_distance = distance_cache[0];
95 distance_cache[4] = last_distance - 1;
96 distance_cache[5] = last_distance + 1;
97 distance_cache[6] = last_distance - 2;
98 distance_cache[7] = last_distance + 2;
99 distance_cache[8] = last_distance - 3;
100 distance_cache[9] = last_distance + 3;
101 if (num_distances > 10) {
102 int next_last_distance = distance_cache[1];
103 distance_cache[10] = next_last_distance - 1;
104 distance_cache[11] = next_last_distance + 1;
105 distance_cache[12] = next_last_distance - 2;
106 distance_cache[13] = next_last_distance + 2;
107 distance_cache[14] = next_last_distance - 3;
108 distance_cache[15] = next_last_distance + 3;
113#define BROTLI_LITERAL_BYTE_SCORE 135
114#define BROTLI_DISTANCE_BIT_PENALTY 30
116#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
135 size_t copy_length,
size_t backward_reference_offset) {
141 size_t copy_length) {
147 size_t distance_short_code) {
148 return (
score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
153 const uint8_t*
data,
size_t max_length,
size_t max_backward,
160 if (
len > max_length) {
166 if (matchlen +
dictionary->cutoffTransformsCount <=
len || matchlen == 0) {
170 size_t cut =
len - matchlen;
171 size_t transform_id = (cut << 2) +
172 (
size_t)((
dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
173 backward = max_backward + 1 + word_idx +
174 (transform_id <<
dictionary->words->size_bits_by_length[
len]);
176 if (backward > max_distance) {
179 score = BackwardReferenceScore(matchlen, backward);
180 if (score < out->score) {
184 out->len_code_delta = (
int)
len - (
int)matchlen;
185 out->distance = backward;
193 size_t max_backward,
size_t max_distance,
200 key = Hash14(
data) << 1;
201 for (
i = 0;
i < (shallow ? 1u : 2u); ++
i, ++key) {
203 if (
dictionary->hash_table_lengths[key] != 0) {
204 BROTLI_BOOL item_matches = TestStaticDictionaryItem(
207 max_length, max_backward, max_distance, out);
221 size_t dist,
size_t len) {
227 size_t dist,
size_t len,
size_t len_code) {
230 (uint32_t)((
len << 5) | (
len == len_code ? 0 : len_code));
239 return code ?
code : BackwardMatchLength(self);
242#define EXPAND_CAT(a, b) CAT(a, b)
243#define CAT(a, b) a ## b
244#define FN(X) EXPAND_CAT(X, HASHER())
247#define BUCKET_BITS 17
248#define MAX_TREE_SEARCH_DEPTH 64
249#define MAX_TREE_COMP_LENGTH 128
251#undef MAX_TREE_SEARCH_DEPTH
252#undef MAX_TREE_COMP_LENGTH
256#define MAX_NUM_MATCHES_H10 128
263#define BUCKET_BITS 16
264#define BUCKET_SWEEP_BITS 0
266#define USE_DICTIONARY 1
268#undef BUCKET_SWEEP_BITS
273#define BUCKET_SWEEP_BITS 1
274#define USE_DICTIONARY 0
277#undef BUCKET_SWEEP_BITS
282#define BUCKET_BITS 17
283#define BUCKET_SWEEP_BITS 2
284#define USE_DICTIONARY 1
288#undef BUCKET_SWEEP_BITS
300#define BUCKET_BITS 15
302#define NUM_LAST_DISTANCES_TO_CHECK 4
308#undef NUM_LAST_DISTANCES_TO_CHECK
310#define NUM_LAST_DISTANCES_TO_CHECK 10
314#undef NUM_LAST_DISTANCES_TO_CHECK
318#define NUM_LAST_DISTANCES_TO_CHECK 16
324#undef NUM_LAST_DISTANCES_TO_CHECK
331#define BUCKET_BITS 20
332#define BUCKET_SWEEP_BITS 2
334#define USE_DICTIONARY 0
338#undef BUCKET_SWEEP_BITS
344#define HASHER() HROLLING_FAST
347#define NUMBUCKETS 16777216
348#define MASK ((NUMBUCKETS * 64) - 1)
354#define HASHER() HROLLING
365#define HASHER_B HROLLING_FAST
373#define HASHER_B HROLLING_FAST
381#define HASHER_B HROLLING
391#define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
392#define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
393#define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
394#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
428 BROTLI_BOOL one_shot,
const size_t input_size,
size_t* alloc_size) {
432 HashMemAllocInBytesH ## N(params, one_shot, input_size, alloc_size); \
446 size_t alloc_size[4] = {0};
448 ChooseHasher(params, ¶ms->
hasher);
452 HasherSize(params, one_shot, input_size, alloc_size);
453 for (
i = 0;
i < 4; ++
i) {
454 if (alloc_size[
i] == 0)
continue;
459#define INITIALIZE_(N) \
461 InitializeH ## N(&hasher->common, \
462 &hasher->privat._H ## N, params); \
478 &hasher->privat._H ## N, \
479 one_shot, input_size, data); \
493 HasherSetup(m, hasher, params,
data, position, input_size, is_last);
498 StitchToPreviousBlockH ## N( \
499 &hasher->privat._H ## N, \
500 input_size, position, data, mask); \
512 const size_t ring_buffer_mask,
const int*
BROTLI_RESTRICT distance_cache,
513 const size_t cur_ix,
const size_t max_length,
const size_t distance_offset,
516 const size_t boundary = distance_offset - source_size;
517 const uint32_t hash_bits = self->
hash_bits;
519 const uint32_t slot_bits = self->
slot_bits;
521 const uint32_t hash_shift = 64u - bucket_bits;
522 const uint32_t slot_mask = (~((uint32_t)0
U)) >> (32 - slot_bits);
523 const uint64_t hash_mask = (~((uint64_t)0
U)) >> (64 - hash_bits);
525 const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
526 const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
527 const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
530 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
532 size_t best_len =
out->len;
535 (BROTLI_UNALIGNED_LOAD64LE(&
data[cur_ix_masked]) & hash_mask) *
536 kPreparedDictionaryHashMul64Long;
537 const uint32_t key = (uint32_t)(
h >> hash_shift);
538 const uint32_t slot = key & slot_mask;
539 const uint32_t
head = heads[key];
541 uint32_t item = (
head == 0xFFFF) ? 1 : 0;
543 const void* tail = (
void*)&items[self->
num_items];
544 if (self->
magic == kPreparedDictionaryMagic) {
545 source = (
const uint8_t*)tail;
548 source = (
const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((
const uint8_t**)tail);
551 for (
i = 0;
i < 4; ++
i) {
552 const size_t distance = (size_t)distance_cache[
i];
556 if (distance <= boundary || distance > distance_offset)
continue;
557 offset = distance_offset - distance;
558 limit = source_size - offset;
560 len = FindMatchLengthWithLimit(&
source[offset], &
data[cur_ix_masked],
563 score_t score = BackwardReferenceScoreUsingLastDistance(
len);
564 if (best_score < score) {
565 if (
i != 0) score -= BackwardReferencePenaltyUsingLastDistance(
i);
566 if (best_score < score) {
568 if (
len > best_len) best_len =
len;
570 out->len_code_delta = 0;
571 out->distance = distance;
572 out->score = best_score;
583 offset = item & 0x7FFFFFFF;
585 distance = distance_offset - offset;
586 limit = source_size - offset;
588 if (distance > max_distance)
continue;
589 if (cur_ix_masked + best_len > ring_buffer_mask ||
591 data[cur_ix_masked + best_len] !=
source[offset + best_len]) {
595 const size_t len = FindMatchLengthWithLimit(&
source[offset],
596 &
data[cur_ix_masked],
599 score_t score = BackwardReferenceScore(
len, distance);
600 if (best_score < score) {
604 out->len_code_delta = 0;
605 out->distance = distance;
606 out->score = best_score;
617 const size_t ring_buffer_mask,
const size_t cur_ix,
const size_t min_length,
618 const size_t max_length,
const size_t distance_offset,
619 const size_t max_distance,
BackwardMatch* matches,
size_t match_limit) {
621 const uint32_t hash_bits = self->
hash_bits;
623 const uint32_t slot_bits = self->
slot_bits;
625 const uint32_t hash_shift = 64u - bucket_bits;
626 const uint32_t slot_mask = (~((uint32_t)0
U)) >> (32 - slot_bits);
627 const uint64_t hash_mask = (~((uint64_t)0
U)) >> (64 - hash_bits);
629 const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
630 const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
631 const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
634 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
635 size_t best_len = min_length;
637 (BROTLI_UNALIGNED_LOAD64LE(&
data[cur_ix_masked]) & hash_mask) *
638 kPreparedDictionaryHashMul64Long;
639 const uint32_t key = (uint32_t)(
h >> hash_shift);
640 const uint32_t slot = key & slot_mask;
641 const uint32_t
head = heads[key];
643 uint32_t item = (
head == 0xFFFF) ? 1 : 0;
646 const void* tail = (
void*)&items[self->
num_items];
647 if (self->
magic == kPreparedDictionaryMagic) {
648 source = (
const uint8_t*)tail;
651 source = (
const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((
const uint8_t**)tail);
661 offset = item & 0x7FFFFFFF;
663 distance = distance_offset - offset;
664 limit = source_size - offset;
666 if (distance > max_distance)
continue;
667 if (cur_ix_masked + best_len > ring_buffer_mask ||
669 data[cur_ix_masked + best_len] !=
source[offset + best_len]) {
672 len = FindMatchLengthWithLimit(
674 if (
len > best_len) {
676 InitBackwardMatch(matches++, distance,
len);
678 if (found == match_limit)
break;
686 const size_t ring_buffer_mask,
const int*
BROTLI_RESTRICT distance_cache,
687 const size_t cur_ix,
const size_t max_length,
688 const size_t max_ring_buffer_distance,
const size_t max_distance,
690 size_t base_offset = max_ring_buffer_distance + 1 + addon->
total_size - 1;
694 FindCompoundDictionaryMatch(
696 distance_cache, cur_ix, max_length,
701static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
703 const size_t ring_buffer_mask,
const size_t cur_ix,
size_t min_length,
704 const size_t max_length,
const size_t max_ring_buffer_distance,
706 size_t match_limit) {
707 size_t base_offset = max_ring_buffer_distance + 1 + addon->
total_size - 1;
709 size_t total_found = 0;
712 total_found += FindAllCompoundDictionaryMatches(
714 cur_ix, min_length, max_length, base_offset - addon->
chunk_offsets[
d],
715 max_distance, matches + total_found, match_limit - total_found);
716 if (total_found == match_limit)
break;
717 if (total_found > 0) {
718 min_length = BackwardMatchLength(&matches[total_found - 1]);
724#if defined(__cplusplus) || defined(c_plusplus)
#define score_t
Definition hash.h:59
struct BackwardMatch BackwardMatch
#define BROTLI_SCORE_BASE
Definition hash.h:101
#define FOR_ALL_HASHERS(H)
Definition hash.h:379
#define BROTLI_LITERAL_BYTE_SCORE
Definition hash.h:98
#define score_t
Definition hash.h:43
#define BROTLI_DISTANCE_BIT_PENALTY
Definition hash.h:99
#define MEMBER_(N)
Definition hash.h:385
struct HasherSearchResult HasherSearchResult
#define BROTLI_ALLOC(M, T, N)
Definition memory.h:44
#define BROTLI_FREE(M, P)
Definition memory.h:48
#define BROTLI_IS_OOM(M)
Definition memory.h:54
#define BROTLI_IS_NULL(A)
Definition memory.h:68
const char * source
Definition lz4.h:808
Set found
Definition combine.py:42
str head
Definition test-lz4-abi.py:25
uint32_t length_and_code
Definition hash.h:202
uint32_t distance
Definition hash.h:201
Definition encoder_dict.h:20
BrotliHasherParams hasher
Definition params.h:41
int type
Definition params.h:16
Definition compound_dictionary.h:57
size_t num_chunks
Definition compound_dictionary.h:59
size_t chunk_offsets[SHARED_BROTLI_MAX_COMPOUND_DICTS+1]
Definition compound_dictionary.h:64
const PreparedDictionary * chunks[SHARED_BROTLI_MAX_COMPOUND_DICTS+1]
Definition compound_dictionary.h:62
size_t total_size
Definition compound_dictionary.h:60
BrotliHasherParams params
Definition hash.h:37
size_t dict_num_matches
Definition hash.h:35
void * extra
Definition hash.h:32
BROTLI_BOOL is_prepared_
Definition hash.h:40
BROTLI_BOOL is_setup_
Definition hash.h:44
size_t dict_num_lookups
Definition hash.h:34
HasherCommon common
Definition hash.h:382
score_t score
Definition hash.h:54
size_t distance
Definition hash.h:53
size_t len
Definition hash.h:52
int len_code_delta
Definition hash.h:55
Definition compound_dictionary.h:33
uint32_t num_items
Definition compound_dictionary.h:35
uint32_t bucket_bits
Definition compound_dictionary.h:38
uint32_t magic
Definition compound_dictionary.h:34
uint32_t slot_bits
Definition compound_dictionary.h:39
uint32_t source_size
Definition compound_dictionary.h:36
uint32_t hash_bits
Definition compound_dictionary.h:37
Definition poolTests.c:28
Definition zstd_decompress.c:306
const char * data
Definition invalidDictionaries.c:32
#define BROTLI_TRUE
Definition types.h:51
#define BROTLI_MAKE_UINT64_T(high, low)
Definition types.h:57
#define BROTLI_FALSE
Definition types.h:53
#define BROTLI_BOOL
Definition types.h:49
#define d(i)
Definition sha256.c:44
#define h(i)
Definition sha256.c:48
const lzma_allocator const uint8_t size_t uint8_t * out
Definition block.h:528
lzma_index ** i
Definition index.h:629
#define NULL
Definition getopt1.c:37
static uint32_t const uint8_t uint32_t len
Definition memcmplen.h:44
static uint32_t const uint8_t uint32_t uint32_t limit
Definition memcmplen.h:45