10#define HistogramType FN(Histogram)
12static void FN(InitialEntropyCodes)(
const DataType*
data,
size_t length,
14 size_t num_histograms,
17 size_t block_length = length / num_histograms;
19 FN(ClearHistograms)(histograms, num_histograms);
20 for (
i = 0;
i < num_histograms; ++
i) {
21 size_t pos = length *
i / num_histograms;
23 pos += MyRand(&
seed) % block_length;
25 if (pos + stride >= length) {
26 pos = length - stride - 1;
28 FN(HistogramAddVector)(&histograms[
i],
data + pos, stride);
32static void FN(RandomSample)(uint32_t*
seed,
38 if (stride >= length) {
41 pos = MyRand(
seed) % (length - stride + 1);
43 FN(HistogramAddVector)(sample,
data + pos, stride);
46static void FN(RefineEntropyCodes)(
const DataType*
data,
size_t length,
48 size_t num_histograms,
51 kIterMulForRefining * length / stride + kMinItersForRefining;
54 iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
55 for (iter = 0; iter < iters; ++iter) {
57 FN(HistogramClear)(&sample);
58 FN(RandomSample)(&
seed,
data, length, stride, &sample);
59 FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
66static size_t FN(FindBlocks)(
const DataType*
data,
const size_t length,
67 const double block_switch_bitcost,
68 const size_t num_histograms,
72 uint8_t* switch_signal,
74 const size_t data_size =
FN(HistogramDataSize)();
75 const size_t bitmaplen = (num_histograms + 7) >> 3;
76 size_t num_blocks = 1;
80 if (num_histograms <= 1) {
81 for (
i = 0;
i < length; ++
i) {
86 memset(insert_cost, 0,
sizeof(insert_cost[0]) * data_size * num_histograms);
87 for (
i = 0;
i < num_histograms; ++
i) {
88 insert_cost[
i] = FastLog2((uint32_t)histograms[
i].total_count_);
90 for (
i = data_size;
i != 0;) {
92 for (j = 0; j < num_histograms; ++j) {
93 insert_cost[
i * num_histograms + j] =
94 insert_cost[j] - BitCost(histograms[j].data_[
i]);
97 memset(cost, 0,
sizeof(cost[0]) * num_histograms);
98 memset(switch_signal, 0,
sizeof(switch_signal[0]) * length * bitmaplen);
105 for (
i = 0;
i < length; ++
i) {
106 const size_t byte_ix =
i;
107 size_t ix = byte_ix * bitmaplen;
108 size_t insert_cost_ix =
data[byte_ix] * num_histograms;
109 double min_cost = 1e99;
110 double block_switch_cost = block_switch_bitcost;
112 for (k = 0; k < num_histograms; ++k) {
114 cost[k] += insert_cost[insert_cost_ix + k];
115 if (cost[k] < min_cost) {
117 block_id[byte_ix] = (uint8_t)k;
121 if (byte_ix < 2000) {
122 block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
124 for (k = 0; k < num_histograms; ++k) {
126 if (cost[k] >= block_switch_cost) {
127 const uint8_t mask = (uint8_t)(1u << (k & 7));
128 cost[k] = block_switch_cost;
130 switch_signal[ix + (k >> 3)] |= mask;
135 size_t byte_ix = length - 1;
136 size_t ix = byte_ix * bitmaplen;
137 uint8_t cur_id = block_id[byte_ix];
138 while (byte_ix > 0) {
139 const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
143 if (switch_signal[ix + (cur_id >> 3)] & mask) {
144 if (cur_id != block_id[byte_ix]) {
145 cur_id = block_id[byte_ix];
149 block_id[byte_ix] = cur_id;
155static size_t FN(RemapBlockIds)(uint8_t* block_ids,
const size_t length,
156 uint16_t* new_id,
const size_t num_histograms) {
157 static const uint16_t kInvalidId = 256;
158 uint16_t next_id = 0;
160 for (
i = 0;
i < num_histograms; ++
i) {
161 new_id[
i] = kInvalidId;
163 for (
i = 0;
i < length; ++
i) {
165 if (new_id[block_ids[
i]] == kInvalidId) {
166 new_id[block_ids[
i]] = next_id++;
169 for (
i = 0;
i < length; ++
i) {
170 block_ids[
i] = (uint8_t)new_id[block_ids[
i]];
177static void FN(BuildBlockHistograms)(
const DataType*
data,
const size_t length,
178 const uint8_t* block_ids,
179 const size_t num_histograms,
182 FN(ClearHistograms)(histograms, num_histograms);
183 for (
i = 0;
i < length; ++
i) {
184 FN(HistogramAdd)(&histograms[block_ids[
i]],
data[
i]);
190 const size_t num_blocks,
193 uint32_t* histogram_symbols =
BROTLI_ALLOC(m, uint32_t, num_blocks);
194 uint32_t* block_lengths =
BROTLI_ALLOC(m, uint32_t, num_blocks);
197 size_t all_histograms_size = 0;
198 size_t all_histograms_capacity = expected_num_clusters;
201 size_t cluster_size_size = 0;
202 size_t cluster_size_capacity = expected_num_clusters;
203 uint32_t* cluster_size =
BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
204 size_t num_clusters = 0;
207 size_t max_num_pairs =
209 size_t pairs_capacity = max_num_pairs + 1;
213 size_t num_final_clusters;
229 memset(block_lengths, 0, num_blocks *
sizeof(uint32_t));
232 size_t block_idx = 0;
233 for (
i = 0;
i < length; ++
i) {
235 ++block_lengths[block_idx];
236 if (
i + 1 == length || block_ids[
i] != block_ids[
i + 1]) {
244 const size_t num_to_combine =
246 size_t num_new_clusters;
248 for (j = 0; j < num_to_combine; ++j) {
250 FN(HistogramClear)(&histograms[j]);
251 for (k = 0; k < block_lengths[
i + j]; ++k) {
252 FN(HistogramAdd)(&histograms[j],
data[pos++]);
255 new_clusters[j] = (uint32_t)j;
256 symbols[j] = (uint32_t)j;
260 histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
263 all_histograms_capacity, all_histograms_size + num_new_clusters);
265 cluster_size_capacity, cluster_size_size + num_new_clusters);
267 for (j = 0; j < num_new_clusters; ++j) {
268 all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
269 cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
270 remap[new_clusters[j]] = (uint32_t)j;
272 for (j = 0; j < num_to_combine; ++j) {
273 histogram_symbols[
i + j] = (uint32_t)num_clusters + remap[symbols[j]];
275 num_clusters += num_new_clusters;
282 BROTLI_MIN(
size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
283 if (pairs_capacity < max_num_pairs + 1) {
291 for (
i = 0;
i < num_clusters; ++
i) {
292 clusters[
i] = (uint32_t)
i;
295 all_histograms, cluster_size, histogram_symbols, clusters, pairs,
303 for (
i = 0;
i < num_clusters; ++
i) new_index[
i] = kInvalidIndex;
306 uint32_t next_index = 0;
307 for (
i = 0;
i < num_blocks; ++
i) {
312 FN(HistogramClear)(&histo);
313 for (j = 0; j < block_lengths[
i]; ++j) {
314 FN(HistogramAdd)(&histo,
data[pos++]);
316 best_out = (
i == 0) ? histogram_symbols[0] : histogram_symbols[
i - 1];
319 for (j = 0; j < num_final_clusters; ++j) {
321 &histo, &all_histograms[clusters[j]]);
322 if (cur_bits < best_bits) {
323 best_bits = cur_bits;
324 best_out = clusters[j];
327 histogram_symbols[
i] = best_out;
328 if (new_index[best_out] == kInvalidIndex) {
329 new_index[best_out] = next_index++;
336 m, uint8_t,
split->types,
split->types_alloc_size, num_blocks);
338 m, uint32_t,
split->lengths,
split->lengths_alloc_size, num_blocks);
341 uint32_t cur_length = 0;
342 size_t block_idx = 0;
343 uint8_t max_type = 0;
344 for (
i = 0;
i < num_blocks; ++
i) {
345 cur_length += block_lengths[
i];
346 if (
i + 1 == num_blocks ||
347 histogram_symbols[
i] != histogram_symbols[
i + 1]) {
348 const uint8_t
id = (uint8_t)new_index[histogram_symbols[
i]];
350 split->lengths[block_idx] = cur_length;
356 split->num_blocks = block_idx;
357 split->num_types = (size_t)max_type + 1;
366 const size_t literals_per_histogram,
367 const size_t max_histograms,
368 const size_t sampling_stride_length,
369 const double block_switch_cost,
372 const size_t data_size =
FN(HistogramDataSize)();
373 size_t num_histograms = length / literals_per_histogram + 1;
375 if (num_histograms > max_histograms) {
376 num_histograms = max_histograms;
379 split->num_types = 1;
381 }
else if (length < kMinLengthForBlockSplitting) {
387 split->num_types = 1;
389 split->lengths[
split->num_blocks] = (uint32_t)length;
396 FN(InitialEntropyCodes)(
data, length,
397 sampling_stride_length,
398 num_histograms, histograms);
399 FN(RefineEntropyCodes)(
data, length,
400 sampling_stride_length,
401 num_histograms, histograms);
405 size_t num_blocks = 0;
406 const size_t bitmaplen = (num_histograms + 7) >> 3;
407 double* insert_cost =
BROTLI_ALLOC(m,
double, data_size * num_histograms);
409 uint8_t* switch_signal =
BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
410 uint16_t* new_id =
BROTLI_ALLOC(m, uint16_t, num_histograms);
418 for (
i = 0;
i < iters; ++
i) {
419 num_blocks =
FN(FindBlocks)(
data, length,
421 num_histograms, histograms,
422 insert_cost, cost, switch_signal,
424 num_histograms =
FN(RemapBlockIds)(block_ids, length,
425 new_id, num_histograms);
426 FN(BuildBlockHistograms)(
data, length, block_ids,
427 num_histograms, histograms);
434 FN(ClusterBlocks)(m,
data, length, num_blocks, block_ids,
split);
static const void * data
Definition XzCrc64.c:50
#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES
Definition constants.h:23
#define FN(X)
Definition backward_references.c:51
double FN BrotliPopulationCost(const HistogramType *histogram)
Definition bit_cost_inc.h:12
#define HISTOGRAMS_PER_BATCH
Definition block_splitter.c:86
#define DataType
Definition block_splitter.c:90
#define CLUSTERS_PER_BATCH
Definition block_splitter.c:87
#define HistogramType
Definition block_splitter_inc.h:10
BROTLI_INTERNAL size_t FN BrotliHistogramCombine(HistogramType *out, uint32_t *cluster_size, uint32_t *symbols, uint32_t *clusters, HistogramPair *pairs, size_t num_clusters, size_t symbols_size, size_t max_clusters, size_t max_num_pairs) CODE(
Definition cluster_inc.h:70
BROTLI_INTERNAL double FN BrotliHistogramBitCostDistance(const HistogramType *histogram, const HistogramType *candidate) CODE(
Definition cluster_inc.h:157
#define BROTLI_ALLOC(M, T, N)
Definition memory.h:44
#define BROTLI_FREE(M, P)
Definition memory.h:48
#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R)
Definition memory.h:81
#define BROTLI_IS_OOM(M)
Definition memory.h:54
#define BROTLI_IS_NULL(A)
Definition memory.h:68
#define HQ_ZOPFLIFICATION_QUALITY
Definition quality.h:20
Buffer split(Buffer &buffer, ZSTD_outBuffer &outBuffer)
Definition Pzstd.cpp:248
Definition block_splitter.h:22
Definition poolTests.c:28
#define BROTLI_UINT32_MAX
Definition types.h:59
lzma_index ** i
Definition index.h:629
lzma_vli id
Filter ID.
Definition filter_common.c:18
uint32_t seed
Definition stream_decompress.c:26