Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
Loading...
Searching...
No Matches
hash_forgetful_chain_inc.h
Go to the documentation of this file.
1/* NOLINT(build/header_guard) */
2/* Copyright 2016 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*/
7
8/* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
9 NUM_LAST_DISTANCES_TO_CHECK */
10
11/* A (forgetful) hash table to the data seen by the compressor, to
12 help create backward references to previous data.
13
14 Hashes are stored in chains which are bucketed to groups. Group of chains
15 share a storage "bank". When more than "bank size" chain nodes are added,
16 oldest nodes are replaced; this way several chains may share a tail. */
17
18#define HashForgetfulChain HASHER()
19
20#define BANK_SIZE (1 << BANK_BITS)
21
22/* Number of hash buckets. */
23#define BUCKET_SIZE (1 << BUCKET_BITS)
24
25#define CAPPED_CHAINS 0
26
27static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
28static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
29
30/* HashBytes is the function that chooses the bucket to place the address in.*/
31static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
32 const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
33 /* The higher bits contain more mixture from the multiplication,
34 so we take our results from there. */
35 return h >> (32 - BUCKET_BITS);
36}
37
38typedef struct FN(Slot) {
39 uint16_t delta;
40 uint16_t next;
41} FN(Slot);
42
43typedef struct FN(Bank) {
44 FN(Slot) slots[BANK_SIZE];
45} FN(Bank);
46
47typedef struct HashForgetfulChain {
48 uint16_t free_slot_idx[NUM_BANKS]; /* Up to 1KiB. Move to dynamic? */
49 size_t max_hops;
50
51 /* Shortcuts. */
52 void* extra[2];
53 HasherCommon* common;
54
55 /* --- Dynamic size members --- */
56
57 /* uint32_t addr[BUCKET_SIZE]; */
58
59 /* uint16_t head[BUCKET_SIZE]; */
60
61 /* Truncated hash used for quick rejection of "distance cache" candidates. */
62 /* uint8_t tiny_hash[65536];*/
63
64 /* FN(Bank) banks[NUM_BANKS]; */
66
67static uint32_t* FN(Addr)(void* extra) {
68 return (uint32_t*)extra;
69}
70
71static uint16_t* FN(Head)(void* extra) {
72 return (uint16_t*)(&FN(Addr)(extra)[BUCKET_SIZE]);
73}
74
75static uint8_t* FN(TinyHash)(void* extra) {
76 return (uint8_t*)(&FN(Head)(extra)[BUCKET_SIZE]);
77}
78
79static FN(Bank)* FN(Banks)(void* extra) {
80 return (FN(Bank)*)(extra);
81}
82
83static void FN(Initialize)(
85 const BrotliEncoderParams* params) {
86 self->common = common;
87 self->extra[0] = common->extra[0];
88 self->extra[1] = common->extra[1];
89
90 self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
91}
92
93static void FN(Prepare)(
95 size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
96 uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
97 uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
98 uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
99 /* Partial preparation is 100 times slower (per socket). */
100 size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
101 if (one_shot && input_size <= partial_prepare_threshold) {
102 size_t i;
103 for (i = 0; i < input_size; ++i) {
104 size_t bucket = FN(HashBytes)(&data[i]);
105 /* See InitEmpty comment. */
106 addr[bucket] = 0xCCCCCCCC;
107 head[bucket] = 0xCCCC;
108 }
109 } else {
110 /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
111 processed by hasher never reaches 3GB + 64M; this makes all new chains
112 to be terminated after the first node. */
113 memset(addr, 0xCC, sizeof(uint32_t) * BUCKET_SIZE);
114 memset(head, 0, sizeof(uint16_t) * BUCKET_SIZE);
115 }
116 memset(tiny_hash, 0, sizeof(uint8_t) * 65536);
117 memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
118}
119
120static BROTLI_INLINE void FN(HashMemAllocInBytes)(
121 const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
122 size_t input_size, size_t* alloc_size) {
123 BROTLI_UNUSED(params);
124 BROTLI_UNUSED(one_shot);
125 BROTLI_UNUSED(input_size);
126 alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE +
127 sizeof(uint16_t) * BUCKET_SIZE + sizeof(uint8_t) * 65536;
128 alloc_size[1] = sizeof(FN(Bank)) * NUM_BANKS;
129}
130
131/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
132 node to corresponding chain; also update tiny_hash for current position. */
133static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
134 const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
135 uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
136 uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
137 uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra[0]);
138 FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
139 const size_t key = FN(HashBytes)(&data[ix & mask]);
140 const size_t bank = key & (NUM_BANKS - 1);
141 const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
142 size_t delta = ix - addr[key];
143 tiny_hash[(uint16_t)ix] = (uint8_t)key;
144 if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
145 banks[bank].slots[idx].delta = (uint16_t)delta;
146 banks[bank].slots[idx].next = head[key];
147 addr[key] = (uint32_t)ix;
148 head[key] = (uint16_t)idx;
149}
150
151static BROTLI_INLINE void FN(StoreRange)(
153 const uint8_t* BROTLI_RESTRICT data, const size_t mask,
154 const size_t ix_start, const size_t ix_end) {
155 size_t i;
156 for (i = ix_start; i < ix_end; ++i) {
157 FN(Store)(self, data, mask, i);
158 }
159}
160
161static BROTLI_INLINE void FN(StitchToPreviousBlock)(
163 size_t num_bytes, size_t position, const uint8_t* ringbuffer,
164 size_t ring_buffer_mask) {
165 if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
166 /* Prepare the hashes for three last bytes of the last write.
167 These could not be calculated before, since they require knowledge
168 of both the previous and the current block. */
169 FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
170 FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
171 FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
172 }
173}
174
175static BROTLI_INLINE void FN(PrepareDistanceCache)(
177 int* BROTLI_RESTRICT distance_cache) {
178 BROTLI_UNUSED(self);
179 PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
180}
181
182/* Find a longest backward match of &data[cur_ix] up to the length of
183 max_length and stores the position cur_ix in the hash table.
184
185 REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
186 values; if this method is invoked repeatedly with the same distance
187 cache values, it is enough to invoke FN(PrepareDistanceCache) once.
188
189 Does not look for matches longer than max_length.
190 Does not look for matches further away than max_backward.
191 Writes the best match into |out|.
192 |out|->score is updated only if a better match is found. */
193static BROTLI_INLINE void FN(FindLongestMatch)(
196 const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
197 const int* BROTLI_RESTRICT distance_cache,
198 const size_t cur_ix, const size_t max_length, const size_t max_backward,
199 const size_t dictionary_distance, const size_t max_distance,
201 uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra[0]);
202 uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra[0]);
203 uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra[0]);
204 FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra[1]);
205 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
206 /* Don't accept a short copy from far away. */
207 score_t min_score = out->score;
208 score_t best_score = out->score;
209 size_t best_len = out->len;
210 size_t i;
211 const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
212 const uint8_t tiny_hash = (uint8_t)(key);
213 out->len = 0;
214 out->len_code_delta = 0;
215 /* Try last distance first. */
216 for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
217 const size_t backward = (size_t)distance_cache[i];
218 size_t prev_ix = (cur_ix - backward);
219 /* For distance code 0 we want to consider 2-byte matches. */
220 if (i > 0 && tiny_hashes[(uint16_t)prev_ix] != tiny_hash) continue;
221 if (prev_ix >= cur_ix || backward > max_backward) {
222 continue;
223 }
224 prev_ix &= ring_buffer_mask;
225 {
226 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
227 &data[cur_ix_masked],
228 max_length);
229 if (len >= 2) {
230 score_t score = BackwardReferenceScoreUsingLastDistance(len);
231 if (best_score < score) {
232 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
233 if (best_score < score) {
234 best_score = score;
235 best_len = len;
236 out->len = best_len;
237 out->distance = backward;
238 out->score = best_score;
239 }
240 }
241 }
242 }
243 }
244 {
245 const size_t bank = key & (NUM_BANKS - 1);
246 size_t backward = 0;
247 size_t hops = self->max_hops;
248 size_t delta = cur_ix - addr[key];
249 size_t slot = head[key];
250 while (hops--) {
251 size_t prev_ix;
252 size_t last = slot;
253 backward += delta;
254 if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
255 prev_ix = (cur_ix - backward) & ring_buffer_mask;
256 slot = banks[bank].slots[last].next;
257 delta = banks[bank].slots[last].delta;
258 if (cur_ix_masked + best_len > ring_buffer_mask ||
259 prev_ix + best_len > ring_buffer_mask ||
260 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
261 continue;
262 }
263 {
264 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
265 &data[cur_ix_masked],
266 max_length);
267 if (len >= 4) {
268 /* Comparing for >= 3 does not change the semantics, but just saves
269 for a few unnecessary binary logarithms in backward reference
270 score, since we are not interested in such short matches. */
271 score_t score = BackwardReferenceScore(len, backward);
272 if (best_score < score) {
273 best_score = score;
274 best_len = len;
275 out->len = best_len;
276 out->distance = backward;
277 out->score = best_score;
278 }
279 }
280 }
281 }
282 FN(Store)(self, data, ring_buffer_mask, cur_ix);
283 }
284 if (out->score == min_score) {
285 SearchInStaticDictionary(dictionary,
286 self->common, &data[cur_ix_masked], max_length, dictionary_distance,
287 max_distance, out, BROTLI_FALSE);
288 }
289}
290
291#undef BANK_SIZE
292#undef BUCKET_SIZE
293#undef CAPPED_CHAINS
294
295#undef HashForgetfulChain
static const void * data
Definition XzCrc64.c:50
#define FN(X)
Definition backward_references.c:51
#define NUM_LAST_DISTANCES_TO_CHECK
Definition hash.h:287
#define score_t
Definition hash.h:43
#define BUCKET_BITS
Definition hash.h:232
#define NUM_BANKS
Definition hash.h:288
#define BUCKET_SIZE
Definition hash_forgetful_chain_inc.h:23
#define CAPPED_CHAINS
Definition hash_forgetful_chain_inc.h:25
#define HashForgetfulChain
Definition hash_forgetful_chain_inc.h:18
#define BANK_SIZE
Definition hash_forgetful_chain_inc.h:20
str head
Definition test-lz4-abi.py:25
Definition encoder_dict.h:20
Definition params.h:32
Definition hash_forgetful_chain_inc.h:47
Definition hash.h:30
Definition hash.h:51
Definition poolTests.c:28
Definition zstd_decompress.c:306
#define BROTLI_RESTRICT
Definition platform.h:105
#define BROTLI_UNUSED(X)
Definition platform.h:511
#define BROTLI_INLINE
Definition platform.h:136
#define BROTLI_FALSE
Definition types.h:53
#define BROTLI_BOOL
Definition types.h:49
#define h(i)
Definition sha256.c:48
const lzma_allocator const uint8_t size_t uint8_t * out
Definition block.h:528
lzma_index ** i
Definition index.h:629
static uint32_t const uint8_t uint32_t len
Definition memcmplen.h:44
struct dictionary_s dictionary