Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
Loading...
Searching...
No Matches
hash_longest_match64_inc.h
Go to the documentation of this file.
1/* NOLINT(build/header_guard) */
2/* Copyright 2010 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*/
7
8/* template parameters: FN */
9
10/* A (forgetful) hash table to the data seen by the compressor, to
11 help create backward references to previous data.
12
13 This is a hash map of fixed size (bucket_size_) to a ring buffer of
14 fixed size (block_size_). The ring buffer contains the last block_size_
15 index positions of the given hash key in the compressed data. */
16
17#define HashLongestMatch HASHER()
18
19static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
20static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
21
22/* HashBytes is the function that chooses the bucket to place the address in. */
23static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
24 uint64_t hash_mul) {
25 const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul;
26 /* The higher bits contain more mixture from the multiplication,
27 so we take our results from there. */
28 return (size_t)(h >> (64 - 15));
29}
30
31typedef struct HashLongestMatch {
32 /* Number of hash buckets. */
33 size_t bucket_size_;
34 /* Only block_size_ newest backward references are kept,
35 and the older are forgotten. */
36 size_t block_size_;
37 /* Hash multiplier tuned to match length. */
38 uint64_t hash_mul_;
39 /* Mask for accessing entries in a block (in a ring-buffer manner). */
40 uint32_t block_mask_;
41
42 int block_bits_;
44
45 /* Shortcuts. */
47
48 /* --- Dynamic size members --- */
49
50 /* Number of entries in a particular bucket. */
51 uint16_t* num_; /* uint16_t[bucket_size]; */
52
53 /* Buckets containing block_size_ of backward references. */
54 uint32_t* buckets_; /* uint32_t[bucket_size * block_size]; */
56
57static void FN(Initialize)(
59 const BrotliEncoderParams* params) {
60 self->common_ = common;
61
62 BROTLI_UNUSED(params);
63 self->hash_mul_ = kHashMul64 << (64 - 5 * 8);
64 BROTLI_DCHECK(common->params.bucket_bits == 15);
65 self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
66 self->block_bits_ = common->params.block_bits;
67 self->block_size_ = (size_t)1 << common->params.block_bits;
68 self->block_mask_ = (uint32_t)(self->block_size_ - 1);
69 self->num_last_distances_to_check_ =
70 common->params.num_last_distances_to_check;
71 self->num_ = (uint16_t*)common->extra[0];
72 self->buckets_ = (uint32_t*)common->extra[1];
73}
74
75static void FN(Prepare)(
77 size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
78 uint16_t* BROTLI_RESTRICT num = self->num_;
79 /* Partial preparation is 100 times slower (per socket). */
80 size_t partial_prepare_threshold = self->bucket_size_ >> 6;
81 if (one_shot && input_size <= partial_prepare_threshold) {
82 size_t i;
83 for (i = 0; i < input_size; ++i) {
84 const size_t key = FN(HashBytes)(&data[i], self->hash_mul_);
85 num[key] = 0;
86 }
87 } else {
88 memset(num, 0, self->bucket_size_ * sizeof(num[0]));
89 }
90}
91
92static BROTLI_INLINE void FN(HashMemAllocInBytes)(
93 const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
94 size_t input_size, size_t* alloc_size) {
95 size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
96 size_t block_size = (size_t)1 << params->hasher.block_bits;
97 BROTLI_UNUSED(one_shot);
98 BROTLI_UNUSED(input_size);
99 alloc_size[0] = sizeof(uint16_t) * bucket_size;
100 alloc_size[1] = sizeof(uint32_t) * bucket_size * block_size;
101}
102
103/* Look at 4 bytes at &data[ix & mask].
104 Compute a hash from these, and store the value of ix at that position. */
105static BROTLI_INLINE void FN(Store)(
107 const size_t mask, const size_t ix) {
108 uint16_t* BROTLI_RESTRICT num = self->num_;
109 uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
110 const size_t key = FN(HashBytes)(&data[ix & mask], self->hash_mul_);
111 const size_t minor_ix = num[key] & self->block_mask_;
112 const size_t offset = minor_ix + (key << self->block_bits_);
113 ++num[key];
114 buckets[offset] = (uint32_t)ix;
115}
116
117static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
118 const uint8_t* BROTLI_RESTRICT data, const size_t mask,
119 const size_t ix_start, const size_t ix_end) {
120 size_t i;
121 for (i = ix_start; i < ix_end; ++i) {
122 FN(Store)(self, data, mask, i);
123 }
124}
125
126static BROTLI_INLINE void FN(StitchToPreviousBlock)(
128 size_t num_bytes, size_t position, const uint8_t* ringbuffer,
129 size_t ringbuffer_mask) {
130 if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
131 /* Prepare the hashes for three last bytes of the last write.
132 These could not be calculated before, since they require knowledge
133 of both the previous and the current block. */
134 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
135 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
136 FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
137 }
138}
139
140static BROTLI_INLINE void FN(PrepareDistanceCache)(
142 int* BROTLI_RESTRICT distance_cache) {
143 PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
144}
145
146/* Find a longest backward match of &data[cur_ix] up to the length of
147 max_length and stores the position cur_ix in the hash table.
148
149 REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
150 values; if this method is invoked repeatedly with the same distance
151 cache values, it is enough to invoke FN(PrepareDistanceCache) once.
152
153 Does not look for matches longer than max_length.
154 Does not look for matches further away than max_backward.
155 Writes the best match into |out|.
156 |out|->score is updated only if a better match is found. */
157static BROTLI_INLINE void FN(FindLongestMatch)(
160 const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
161 const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
162 const size_t max_length, const size_t max_backward,
163 const size_t dictionary_distance, const size_t max_distance,
165 uint16_t* BROTLI_RESTRICT num = self->num_;
166 uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
167 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
168 /* Don't accept a short copy from far away. */
169 score_t min_score = out->score;
170 score_t best_score = out->score;
171 size_t best_len = out->len;
172 size_t i;
173 out->len = 0;
174 out->len_code_delta = 0;
175 /* Try last distance first. */
176 for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
177 const size_t backward = (size_t)distance_cache[i];
178 size_t prev_ix = (size_t)(cur_ix - backward);
179 if (prev_ix >= cur_ix) {
180 continue;
181 }
182 if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
183 continue;
184 }
185 prev_ix &= ring_buffer_mask;
186
187 if (cur_ix_masked + best_len > ring_buffer_mask ||
188 prev_ix + best_len > ring_buffer_mask ||
189 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
190 continue;
191 }
192 {
193 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
194 &data[cur_ix_masked],
195 max_length);
196 if (len >= 3 || (len == 2 && i < 2)) {
197 /* Comparing for >= 2 does not change the semantics, but just saves for
198 a few unnecessary binary logarithms in backward reference score,
199 since we are not interested in such short matches. */
200 score_t score = BackwardReferenceScoreUsingLastDistance(len);
201 if (best_score < score) {
202 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
203 if (best_score < score) {
204 best_score = score;
205 best_len = len;
206 out->len = best_len;
207 out->distance = backward;
208 out->score = best_score;
209 }
210 }
211 }
212 }
213 }
214 {
215 const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
216 uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
217 const size_t down =
218 (num[key] > self->block_size_) ?
219 (num[key] - self->block_size_) : 0u;
220 const uint32_t first4 = BrotliUnalignedRead32(data + cur_ix_masked);
221 const size_t max_length_m4 = max_length - 4;
222 i = num[key];
223 for (; i > down;) {
224 size_t prev_ix = bucket[--i & self->block_mask_];
225 uint32_t current4;
226 const size_t backward = cur_ix - prev_ix;
227 if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
228 break;
229 }
230 prev_ix &= ring_buffer_mask;
231 if (cur_ix_masked + best_len > ring_buffer_mask ||
232 prev_ix + best_len > ring_buffer_mask ||
233 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
234 continue;
235 }
236 current4 = BrotliUnalignedRead32(data + prev_ix);
237 if (first4 != current4) continue;
238 {
239 const size_t len = FindMatchLengthWithLimit(&data[prev_ix + 4],
240 &data[cur_ix_masked + 4],
241 max_length_m4) + 4;
242 const score_t score = BackwardReferenceScore(len, backward);
243 if (best_score < score) {
244 best_score = score;
245 best_len = len;
246 out->len = best_len;
247 out->distance = backward;
248 out->score = best_score;
249 }
250 }
251 }
252 bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
253 ++num[key];
254 }
255 if (min_score == out->score) {
256 SearchInStaticDictionary(dictionary,
257 self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
258 max_distance, out, BROTLI_FALSE);
259 }
260}
261
262#undef HashLongestMatch
static const void * data
Definition XzCrc64.c:50
#define FN(X)
Definition backward_references.c:51
#define score_t
Definition hash.h:43
#define HashLongestMatch
Definition hash_longest_match64_inc.h:17
Definition encoder_dict.h:20
Definition params.h:32
Definition hash_longest_match64_inc.h:32
int num_last_distances_to_check_
Definition hash_longest_match64_inc.h:46
uint16_t * num_
Definition hash_longest_match64_inc.h:54
uint32_t block_mask_
Definition hash_longest_match64_inc.h:43
size_t block_size_
Definition hash_longest_match64_inc.h:37
uint32_t * buckets_
Definition hash_longest_match64_inc.h:57
size_t bucket_size_
Definition hash_longest_match64_inc.h:34
HasherCommon * common_
Definition hash_longest_match64_inc.h:49
int block_bits_
Definition hash_longest_match64_inc.h:45
uint64_t hash_mul_
Definition hash_longest_match64_inc.h:38
Definition hash.h:30
Definition hash.h:51
Definition poolTests.c:28
Definition zstd_decompress.c:306
#define BROTLI_RESTRICT
Definition platform.h:105
#define BROTLI_PREDICT_FALSE(x)
Definition platform.h:85
#define BROTLI_UNUSED(X)
Definition platform.h:511
#define BROTLI_DCHECK(x)
Definition platform.h:484
#define BROTLI_INLINE
Definition platform.h:136
#define BROTLI_FALSE
Definition types.h:53
#define BROTLI_BOOL
Definition types.h:49
#define h(i)
Definition sha256.c:48
const lzma_allocator const uint8_t size_t uint8_t * out
Definition block.h:528
lzma_index ** i
Definition index.h:629
static uint32_t const uint8_t uint32_t len
Definition memcmplen.h:44
struct dictionary_s dictionary