Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
Loading...
Searching...
No Matches
hash_to_binary_tree_inc.h
Go to the documentation of this file.
1/* NOLINT(build/header_guard) */
2/* Copyright 2016 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*/
7
8/* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH,
9 MAX_TREE_SEARCH_DEPTH */
10
11/* A (forgetful) hash table where each hash bucket contains a binary tree of
12 sequences whose first 4 bytes share the same hash code.
13 Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting
14 position in the input data. The binary tree is sorted by the lexicographic
15 order of the sequences, and it is also a max-heap with respect to the
16 starting positions. */
17
18#define HashToBinaryTree HASHER()
19
20#define BUCKET_SIZE (1 << BUCKET_BITS)
21
22static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
23static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
25}
26
27static uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
28 uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
29 /* The higher bits contain more mixture from the multiplication,
30 so we take our results from there. */
31 return h >> (32 - BUCKET_BITS);
32}
33
34typedef struct HashToBinaryTree {
35 /* The window size minus 1 */
36 size_t window_mask_;
37
38 /* Hash table that maps the 4-byte hashes of the sequence to the last
39 position where this hash was found, which is the root of the binary
40 tree of sequences that share this hash bucket. */
41 uint32_t* buckets_; /* uint32_t[BUCKET_SIZE]; */
42
43 /* A position used to mark a non-existent sequence, i.e. a tree is empty if
44 its root is at invalid_pos_ and a node is a leaf if both its children
45 are at invalid_pos_. */
46 uint32_t invalid_pos_;
47
48 /* --- Dynamic size members --- */
49
50 /* The union of the binary trees of each hash bucket. The root of the tree
51 corresponding to a hash is a sequence starting at buckets_[hash] and
52 the left and right children of a sequence starting at pos are
53 forest_[2 * pos] and forest_[2 * pos + 1]. */
54 uint32_t* forest_; /* uint32_t[2 * num_nodes] */
56
57static void FN(Initialize)(
59 const BrotliEncoderParams* params) {
60 self->buckets_ = (uint32_t*)common->extra[0];
61 self->forest_ = (uint32_t*)common->extra[1];
62
63 self->window_mask_ = (1u << params->lgwin) - 1u;
64 self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
65}
66
67static void FN(Prepare)
69 size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
70 uint32_t invalid_pos = self->invalid_pos_;
71 uint32_t i;
72 uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
74 BROTLI_UNUSED(one_shot);
75 BROTLI_UNUSED(input_size);
76 for (i = 0; i < BUCKET_SIZE; i++) {
77 buckets[i] = invalid_pos;
78 }
79}
80
81static BROTLI_INLINE void FN(HashMemAllocInBytes)(
82 const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
83 size_t input_size, size_t* alloc_size) {
84 size_t num_nodes = (size_t)1 << params->lgwin;
85 if (one_shot && input_size < num_nodes) {
86 num_nodes = input_size;
87 }
88 alloc_size[0] = sizeof(uint32_t) * BUCKET_SIZE;
89 alloc_size[1] = 2 * sizeof(uint32_t) * num_nodes;
90}
91
92static BROTLI_INLINE size_t FN(LeftChildIndex)(
94 const size_t pos) {
95 return 2 * (pos & self->window_mask_);
96}
97
98static BROTLI_INLINE size_t FN(RightChildIndex)(
100 const size_t pos) {
101 return 2 * (pos & self->window_mask_) + 1;
102}
103
104/* Stores the hash of the next 4 bytes and in a single tree-traversal, the
105 hash bucket's binary tree is searched for matches and is re-rooted at the
106 current position.
107
108 If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the
109 current position is searched for matches, but the state of the hash table
110 is not changed, since we can not know the final sorting order of the
111 current (incomplete) sequence.
112
113 This function must be called with increasing cur_ix positions. */
114static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
116 const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
117 const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
119 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
120 const size_t max_comp_len =
121 BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH);
122 const BROTLI_BOOL should_reroot_tree =
124 const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
125 uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
126 uint32_t* BROTLI_RESTRICT forest = self->forest_;
127 size_t prev_ix = buckets[key];
128 /* The forest index of the rightmost node of the left subtree of the new
129 root, updated as we traverse and re-root the tree of the hash bucket. */
130 size_t node_left = FN(LeftChildIndex)(self, cur_ix);
131 /* The forest index of the leftmost node of the right subtree of the new
132 root, updated as we traverse and re-root the tree of the hash bucket. */
133 size_t node_right = FN(RightChildIndex)(self, cur_ix);
134 /* The match length of the rightmost node of the left subtree of the new
135 root, updated as we traverse and re-root the tree of the hash bucket. */
136 size_t best_len_left = 0;
137 /* The match length of the leftmost node of the right subtree of the new
138 root, updated as we traverse and re-root the tree of the hash bucket. */
139 size_t best_len_right = 0;
140 size_t depth_remaining;
141 if (should_reroot_tree) {
142 buckets[key] = (uint32_t)cur_ix;
143 }
144 for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
145 const size_t backward = cur_ix - prev_ix;
146 const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
147 if (backward == 0 || backward > max_backward || depth_remaining == 0) {
148 if (should_reroot_tree) {
149 forest[node_left] = self->invalid_pos_;
150 forest[node_right] = self->invalid_pos_;
151 }
152 break;
153 }
154 {
155 const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right);
156 size_t len;
158 len = cur_len +
159 FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len],
160 &data[prev_ix_masked + cur_len],
161 max_length - cur_len);
163 0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len));
164 if (matches && len > *best_len) {
165 *best_len = len;
166 InitBackwardMatch(matches++, backward, len);
167 }
168 if (len >= max_comp_len) {
169 if (should_reroot_tree) {
170 forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
171 forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
172 }
173 break;
174 }
175 if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
176 best_len_left = len;
177 if (should_reroot_tree) {
178 forest[node_left] = (uint32_t)prev_ix;
179 }
180 node_left = FN(RightChildIndex)(self, prev_ix);
181 prev_ix = forest[node_left];
182 } else {
183 best_len_right = len;
184 if (should_reroot_tree) {
185 forest[node_right] = (uint32_t)prev_ix;
186 }
187 node_right = FN(LeftChildIndex)(self, prev_ix);
188 prev_ix = forest[node_right];
189 }
190 }
191 }
192 return matches;
193}
194
195/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
196 length of max_length and stores the position cur_ix in the hash table.
197
198 Sets *num_matches to the number of matches found, and stores the found
199 matches in matches[0] to matches[*num_matches - 1]. The matches will be
200 sorted by strictly increasing length and (non-strictly) increasing
201 distance. */
202static BROTLI_INLINE size_t FN(FindAllMatches)(
205 const uint8_t* BROTLI_RESTRICT data,
206 const size_t ring_buffer_mask, const size_t cur_ix,
207 const size_t max_length, const size_t max_backward,
208 const size_t dictionary_distance, const BrotliEncoderParams* params,
209 BackwardMatch* matches) {
210 BackwardMatch* const orig_matches = matches;
211 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
212 size_t best_len = 1;
213 const size_t short_match_max_backward =
214 params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64;
215 size_t stop = cur_ix - short_match_max_backward;
216 uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
217 size_t i;
218 if (cur_ix < short_match_max_backward) { stop = 0; }
219 for (i = cur_ix - 1; i > stop && best_len <= 2; --i) {
220 size_t prev_ix = i;
221 const size_t backward = cur_ix - prev_ix;
222 if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
223 break;
224 }
225 prev_ix &= ring_buffer_mask;
226 if (data[cur_ix_masked] != data[prev_ix] ||
227 data[cur_ix_masked + 1] != data[prev_ix + 1]) {
228 continue;
229 }
230 {
231 const size_t len =
232 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
233 max_length);
234 if (len > best_len) {
235 best_len = len;
236 InitBackwardMatch(matches++, backward, len);
237 }
238 }
239 }
240 if (best_len < max_length) {
241 matches = FN(StoreAndFindMatches)(self, data, cur_ix,
242 ring_buffer_mask, max_length, max_backward, &best_len, matches);
243 }
245 dict_matches[i] = kInvalidMatch;
246 }
247 {
248 size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
250 &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
251 size_t maxlen = BROTLI_MIN(
252 size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
253 size_t l;
254 for (l = minlen; l <= maxlen; ++l) {
255 uint32_t dict_id = dict_matches[l];
256 if (dict_id < kInvalidMatch) {
257 size_t distance = dictionary_distance + (dict_id >> 5) + 1;
258 if (distance <= params->dist.max_distance) {
259 InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
260 }
261 }
262 }
263 }
264 }
265 return (size_t)(matches - orig_matches);
266}
267
268/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
269 current sequence, without returning any matches.
270 REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
271static BROTLI_INLINE void FN(Store)(HashToBinaryTree* BROTLI_RESTRICT self,
272 const uint8_t* BROTLI_RESTRICT data,
273 const size_t mask, const size_t ix) {
274 /* Maximum distance is window size - 16, see section 9.1. of the spec. */
275 const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
276 FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
277 max_backward, NULL, NULL);
278}
279
280static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* BROTLI_RESTRICT self,
281 const uint8_t* BROTLI_RESTRICT data, const size_t mask,
282 const size_t ix_start, const size_t ix_end) {
283 size_t i = ix_start;
284 size_t j = ix_start;
285 if (ix_start + 63 <= ix_end) {
286 i = ix_end - 63;
287 }
288 if (ix_start + 512 <= i) {
289 for (; j < i; j += 8) {
290 FN(Store)(self, data, mask, j);
291 }
292 }
293 for (; i < ix_end; ++i) {
294 FN(Store)(self, data, mask, i);
295 }
296}
297
298static BROTLI_INLINE void FN(StitchToPreviousBlock)(
300 size_t num_bytes, size_t position, const uint8_t* ringbuffer,
301 size_t ringbuffer_mask) {
302 if (num_bytes >= FN(HashTypeLength)() - 1 &&
303 position >= MAX_TREE_COMP_LENGTH) {
304 /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
305 These could not be calculated before, since they require knowledge
306 of both the previous and the current block. */
307 const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1;
308 const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes);
309 size_t i;
310 for (i = i_start; i < i_end; ++i) {
311 /* Maximum distance is window size - 16, see section 9.1. of the spec.
312 Furthermore, we have to make sure that we don't look further back
313 from the start of the next block than the window size, otherwise we
314 could access already overwritten areas of the ring-buffer. */
315 const size_t max_backward =
316 self->window_mask_ - BROTLI_MAX(size_t,
318 position - i);
319 /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the
320 end of the current block and that we have at least
321 MAX_TREE_COMP_LENGTH tail in the ring-buffer. */
322 FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask,
323 MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL);
324 }
325 }
326}
327
328#undef BUCKET_SIZE
329
330#undef HashToBinaryTree
static const void * data
Definition XzCrc64.c:50
#define BROTLI_WINDOW_GAP
Definition constants.h:102
#define FN(X)
Definition backward_references.c:51
#define MAX_TREE_SEARCH_DEPTH
Definition hash.h:233
#define BUCKET_BITS
Definition hash.h:232
#define MAX_TREE_COMP_LENGTH
Definition hash.h:234
#define BUCKET_SIZE
Definition hash_to_binary_tree_inc.h:20
#define HashToBinaryTree
Definition hash_to_binary_tree_inc.h:18
#define HQ_ZOPFLIFICATION_QUALITY
Definition quality.h:20
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(const BrotliEncoderDictionary *dictionary, const uint8_t *data, size_t min_length, size_t max_length, uint32_t *matches)
Definition static_dict.c:77
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN
Definition static_dict.h:21
Definition hash.h:200
Definition encoder_dict.h:20
Definition params.h:32
Definition hash_to_binary_tree_inc.h:34
size_t window_mask_
Definition hash_to_binary_tree_inc.h:36
uint32_t * forest_
Definition hash_to_binary_tree_inc.h:54
uint32_t * buckets_
Definition hash_to_binary_tree_inc.h:41
uint32_t invalid_pos_
Definition hash_to_binary_tree_inc.h:46
Definition hash.h:30
Definition poolTests.c:28
Definition zstd_decompress.c:306
#define BROTLI_RESTRICT
Definition platform.h:105
#define BROTLI_PREDICT_FALSE(x)
Definition platform.h:85
#define BROTLI_MAX(T, A, B)
#define BROTLI_UNUSED(X)
Definition platform.h:511
#define BROTLI_MIN(T, A, B)
#define BROTLI_DCHECK(x)
Definition platform.h:484
#define BROTLI_INLINE
Definition platform.h:136
#define TO_BROTLI_BOOL(X)
Definition types.h:55
#define BROTLI_BOOL
Definition types.h:49
#define h(i)
Definition sha256.c:48
lzma_index ** i
Definition index.h:629
#define NULL
Definition getopt1.c:37
static uint32_t const uint8_t uint32_t len
Definition memcmplen.h:44
struct dictionary_s dictionary