10#define HistogramType FN(Histogram)
12static void FN(InitialEntropyCodes)(
const DataType*
data,
size_t length,
14 size_t num_histograms,
17 size_t block_length = length / num_histograms;
19 FN(ClearHistograms)(histograms, num_histograms);
20 for (
i = 0;
i < num_histograms; ++
i) {
21 size_t pos = length *
i / num_histograms;
23 pos += MyRand(&
seed) % block_length;
25 if (pos + stride >= length) {
26 pos = length - stride - 1;
28 FN(HistogramAddVector)(&histograms[
i],
data + pos, stride);
32static void FN(RandomSample)(uint32_t*
seed,
38 if (stride >= length) {
41 pos = MyRand(
seed) % (length - stride + 1);
43 FN(HistogramAddVector)(sample,
data + pos, stride);
46static void FN(RefineEntropyCodes)(
const DataType*
data,
size_t length,
48 size_t num_histograms,
52 kIterMulForRefining * length / stride + kMinItersForRefining;
55 iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
56 for (iter = 0; iter < iters; ++iter) {
57 FN(HistogramClear)(tmp);
58 FN(RandomSample)(&
seed,
data, length, stride, tmp);
59 FN(HistogramAddHistogram)(&histograms[iter % num_histograms], tmp);
66static size_t FN(FindBlocks)(
const DataType*
data,
const size_t length,
67 const double block_switch_bitcost,
68 const size_t num_histograms,
72 uint8_t* switch_signal,
74 const size_t alphabet_size =
FN(HistogramDataSize)();
75 const size_t bitmap_len = (num_histograms + 7) >> 3;
76 size_t num_blocks = 1;
83 if (num_histograms <= 1) {
84 for (
i = 0;
i < length; ++
i) {
93 memset(insert_cost, 0,
94 sizeof(insert_cost[0]) * alphabet_size * num_histograms);
95 for (
i = 0;
i < num_histograms; ++
i) {
96 insert_cost[
i] = FastLog2((uint32_t)histograms[
i].total_count_);
98 for (
i = alphabet_size;
i != 0;) {
101 for (j = 0; j < num_histograms; ++j) {
102 insert_cost[
i * num_histograms + j] =
103 insert_cost[j] - BitCost(histograms[j].data_[
i]);
113 memset(cost, 0,
sizeof(cost[0]) * num_histograms);
114 memset(switch_signal, 0,
sizeof(switch_signal[0]) * length * bitmap_len);
115 for (byte_ix = 0; byte_ix < length; ++byte_ix) {
116 size_t ix = byte_ix * bitmap_len;
117 size_t symbol =
data[byte_ix];
118 size_t insert_cost_ix = symbol * num_histograms;
119 double min_cost = 1e99;
120 double block_switch_cost = block_switch_bitcost;
122 for (k = 0; k < num_histograms; ++k) {
124 cost[k] += insert_cost[insert_cost_ix + k];
125 if (cost[k] < min_cost) {
127 block_id[byte_ix] = (uint8_t)k;
131 if (byte_ix < 2000) {
132 block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
134 for (k = 0; k < num_histograms; ++k) {
136 if (cost[k] >= block_switch_cost) {
137 const uint8_t mask = (uint8_t)(1u << (k & 7));
138 cost[k] = block_switch_cost;
140 switch_signal[ix + (k >> 3)] |= mask;
145 byte_ix = length - 1;
147 size_t ix = byte_ix * bitmap_len;
148 uint8_t cur_id = block_id[byte_ix];
149 while (byte_ix > 0) {
150 const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
154 if (switch_signal[ix + (cur_id >> 3)] & mask) {
155 if (cur_id != block_id[byte_ix]) {
156 cur_id = block_id[byte_ix];
160 block_id[byte_ix] = cur_id;
166static size_t FN(RemapBlockIds)(uint8_t* block_ids,
const size_t length,
167 uint16_t* new_id,
const size_t num_histograms) {
168 static const uint16_t kInvalidId = 256;
169 uint16_t next_id = 0;
171 for (
i = 0;
i < num_histograms; ++
i) {
172 new_id[
i] = kInvalidId;
174 for (
i = 0;
i < length; ++
i) {
176 if (new_id[block_ids[
i]] == kInvalidId) {
177 new_id[block_ids[
i]] = next_id++;
180 for (
i = 0;
i < length; ++
i) {
181 block_ids[
i] = (uint8_t)new_id[block_ids[
i]];
188static void FN(BuildBlockHistograms)(
const DataType*
data,
const size_t length,
189 const uint8_t* block_ids,
190 const size_t num_histograms,
193 FN(ClearHistograms)(histograms, num_histograms);
194 for (
i = 0;
i < length; ++
i) {
195 FN(HistogramAdd)(&histograms[block_ids[
i]],
data[
i]);
203 const size_t num_blocks,
206 uint32_t* histogram_symbols =
BROTLI_ALLOC(m, uint32_t, num_blocks);
211 size_t all_histograms_size = 0;
212 size_t all_histograms_capacity = expected_num_clusters;
215 size_t cluster_size_size = 0;
216 size_t cluster_size_capacity = expected_num_clusters;
217 uint32_t* cluster_size =
BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
218 size_t num_clusters = 0;
221 size_t max_num_pairs =
223 size_t pairs_capacity = max_num_pairs + 1;
227 size_t num_final_clusters;
251 size_t block_idx = 0;
252 for (
i = 0;
i < length; ++
i) {
254 ++block_lengths[block_idx];
255 if (
i + 1 == length || block_ids[
i] != block_ids[
i + 1]) {
264 const size_t num_to_combine =
266 size_t num_new_clusters;
268 for (j = 0; j < num_to_combine; ++j) {
270 size_t block_length = block_lengths[
i + j];
271 FN(HistogramClear)(&histograms[j]);
272 for (k = 0; k < block_length; ++k) {
273 FN(HistogramAdd)(&histograms[j],
data[pos++]);
276 new_clusters[j] = (uint32_t)j;
277 symbols[j] = (uint32_t)j;
281 histograms, tmp, sizes, symbols, new_clusters, pairs, num_to_combine,
284 all_histograms_capacity, all_histograms_size + num_new_clusters);
286 cluster_size_capacity, cluster_size_size + num_new_clusters);
288 for (j = 0; j < num_new_clusters; ++j) {
289 all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
290 cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
291 remap[new_clusters[j]] = (uint32_t)j;
293 for (j = 0; j < num_to_combine; ++j) {
294 histogram_symbols[
i + j] = (uint32_t)num_clusters + remap[symbols[j]];
296 num_clusters += num_new_clusters;
304 BROTLI_MIN(
size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
305 if (pairs_capacity < max_num_pairs + 1) {
312 for (
i = 0;
i < num_clusters; ++
i) {
313 clusters[
i] = (uint32_t)
i;
316 all_histograms, tmp, cluster_size, histogram_symbols, clusters, pairs,
325 for (
i = 0;
i < num_clusters; ++
i) new_index[
i] = kInvalidIndex;
328 uint32_t next_index = 0;
329 for (
i = 0;
i < num_blocks; ++
i) {
333 FN(HistogramClear)(tmp);
334 for (j = 0; j < block_lengths[
i]; ++j) {
335 FN(HistogramAdd)(tmp,
data[pos++]);
339 best_out = (
i == 0) ? histogram_symbols[0] : histogram_symbols[
i - 1];
341 tmp, &all_histograms[best_out], tmp + 1);
342 for (j = 0; j < num_final_clusters; ++j) {
344 tmp, &all_histograms[clusters[j]], tmp + 1);
345 if (cur_bits < best_bits) {
346 best_bits = cur_bits;
347 best_out = clusters[j];
350 histogram_symbols[
i] = best_out;
351 if (new_index[best_out] == kInvalidIndex) {
352 new_index[best_out] = next_index++;
360 m, uint8_t,
split->types,
split->types_alloc_size, num_blocks);
362 m, uint32_t,
split->lengths,
split->lengths_alloc_size, num_blocks);
368 uint32_t cur_length = 0;
369 size_t block_idx = 0;
370 uint8_t max_type = 0;
371 for (
i = 0;
i < num_blocks; ++
i) {
372 cur_length += block_lengths[
i];
373 if (
i + 1 == num_blocks ||
374 histogram_symbols[
i] != histogram_symbols[
i + 1]) {
375 const uint8_t
id = (uint8_t)new_index[histogram_symbols[
i]];
377 split->lengths[block_idx] = cur_length;
383 split->num_blocks = block_idx;
384 split->num_types = (size_t)max_type + 1;
399 const size_t symbols_per_histogram,
400 const size_t max_histograms,
401 const size_t sampling_stride_length,
402 const double block_switch_cost,
405 const size_t data_size =
FN(HistogramDataSize)();
410 size_t num_histograms = length / symbols_per_histogram + 1;
411 if (num_histograms > max_histograms) {
412 num_histograms = max_histograms;
417 split->num_types = 1;
421 if (length < kMinLengthForBlockSplitting) {
427 split->num_types = 1;
429 split->lengths[
split->num_blocks] = (uint32_t)length;
434 tmp = histograms + num_histograms;
437 FN(InitialEntropyCodes)(
data, length,
438 sampling_stride_length,
439 num_histograms, histograms);
440 FN(RefineEntropyCodes)(
data, length,
441 sampling_stride_length,
442 num_histograms, histograms, tmp);
446 size_t num_blocks = 0;
447 const size_t bitmaplen = (num_histograms + 7) >> 3;
448 double* insert_cost =
BROTLI_ALLOC(m,
double, data_size * num_histograms);
450 uint8_t* switch_signal =
BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
451 uint16_t* new_id =
BROTLI_ALLOC(m, uint16_t, num_histograms);
459 for (
i = 0;
i < iters; ++
i) {
460 num_blocks =
FN(FindBlocks)(
data, length,
462 num_histograms, histograms,
463 insert_cost, cost, switch_signal,
465 num_histograms =
FN(RemapBlockIds)(block_ids, length,
466 new_id, num_histograms);
467 FN(BuildBlockHistograms)(
data, length, block_ids,
468 num_histograms, histograms);
475 FN(ClusterBlocks)(m,
data, length, num_blocks, block_ids,
split);
static const void * data
Definition XzCrc64.c:50
#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES
Definition constants.h:23
#define FN(X)
Definition backward_references.c:51
double FN BrotliPopulationCost(const HistogramType *histogram)
Definition bit_cost_inc.h:12
#define HISTOGRAMS_PER_BATCH
Definition block_splitter.c:86
#define DataType
Definition block_splitter.c:90
#define CLUSTERS_PER_BATCH
Definition block_splitter.c:87
#define HistogramType
Definition block_splitter_inc.h:10
BROTLI_INTERNAL size_t FN BrotliHistogramCombine(HistogramType *out, uint32_t *cluster_size, uint32_t *symbols, uint32_t *clusters, HistogramPair *pairs, size_t num_clusters, size_t symbols_size, size_t max_clusters, size_t max_num_pairs) CODE(
Definition cluster_inc.h:70
BROTLI_INTERNAL double FN BrotliHistogramBitCostDistance(const HistogramType *histogram, const HistogramType *candidate) CODE(
Definition cluster_inc.h:157
#define BROTLI_ALLOC(M, T, N)
Definition memory.h:44
#define BROTLI_FREE(M, P)
Definition memory.h:48
#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R)
Definition memory.h:81
#define BROTLI_IS_OOM(M)
Definition memory.h:54
#define BROTLI_IS_NULL(A)
Definition memory.h:68
#define HQ_ZOPFLIFICATION_QUALITY
Definition quality.h:20
Buffer split(Buffer &buffer, ZSTD_outBuffer &outBuffer)
Definition Pzstd.cpp:248
Definition block_splitter.h:22
Definition poolTests.c:28
#define BROTLI_UINT32_MAX
Definition types.h:59
lzma_index ** i
Definition index.h:629
lzma_vli id
Filter ID.
Definition filter_common.c:18
uint32_t u32
Definition zstd_decompress.c:62
uint32_t seed
Definition stream_decompress.c:26