![]() |
Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
|
#include <stdio.h>#include <stdlib.h>#include <string.h>#include <time.h>#include "../common/mem.h"#include "../common/pool.h"#include "../common/threading.h"#include "../common/zstd_internal.h"#include "../common/bits.h"#include "../zdict.h"#include "cover.h"Data Structures | |
| struct | COVER_map_pair_t_s |
| struct | COVER_map_s |
| struct | COVER_ctx_t |
| struct | COVER_tryParameters_data_s |
Macros | |
| #define | ZDICT_STATIC_LINKING_ONLY |
| #define | COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) |
| #define | COVER_DEFAULT_SPLITPOINT 1.0 |
| #define | DISPLAY(...) |
| #define | LOCALDISPLAYLEVEL(displayLevel, l, ...) |
| #define | DISPLAYLEVEL(l, ...) |
| #define | LOCALDISPLAYUPDATE(displayLevel, l, ...) |
| #define | DISPLAYUPDATE(l, ...) |
| #define | MAP_EMPTY_VALUE ((U32)-1) |
Typedefs | |
| typedef struct COVER_map_pair_t_s | COVER_map_pair_t |
| typedef struct COVER_map_s | COVER_map_t |
| typedef struct COVER_tryParameters_data_s | COVER_tryParameters_data_t |
Functions | |
| size_t | COVER_sum (const size_t *samplesSizes, unsigned nbSamples) |
| void | COVER_warnOnSmallCorpus (size_t maxDictSize, size_t nbDmers, int displayLevel) |
| COVER_epoch_info_t | COVER_computeEpochs (U32 maxDictSize, U32 nbDmers, U32 k, U32 passes) |
| ZDICTLIB_API size_t | ZDICT_trainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t parameters) |
| size_t | COVER_checkTotalCompressedSize (const ZDICT_cover_params_t parameters, const size_t *samplesSizes, const BYTE *samples, size_t *offsets, size_t nbTrainSamples, size_t nbSamples, BYTE *const dict, size_t dictBufferCapacity) |
| void | COVER_best_init (COVER_best_t *best) |
| void | COVER_best_wait (COVER_best_t *best) |
| void | COVER_best_destroy (COVER_best_t *best) |
| void | COVER_best_start (COVER_best_t *best) |
| void | COVER_best_finish (COVER_best_t *best, ZDICT_cover_params_t parameters, COVER_dictSelection_t selection) |
| COVER_dictSelection_t | COVER_dictSelectionError (size_t error) |
| unsigned | COVER_dictSelectionIsError (COVER_dictSelection_t selection) |
| void | COVER_dictSelectionFree (COVER_dictSelection_t selection) |
| COVER_dictSelection_t | COVER_selectDict (BYTE *customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE *samplesBuffer, const size_t *samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t *offsets, size_t totalCompressedSize) |
| ZDICTLIB_API size_t | ZDICT_optimizeTrainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t *parameters) |
| #define COVER_DEFAULT_SPLITPOINT 1.0 |
| #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) |
There are 32bit indexes used to ref samples, so limit samples size to 4GB on 64bit builds. For 32bit builds we choose 1 GB. Most 32bit platforms have 2GB user-mode addressable space and we allocate a large contiguous buffer, so 1GB is already a high limit.
| #define DISPLAY | ( | ... | ) |
| #define DISPLAYLEVEL | ( | l, | |
| ... ) |
| #define DISPLAYUPDATE | ( | l, | |
| ... ) |
| #define LOCALDISPLAYLEVEL | ( | displayLevel, | |
| l, | |||
| ... ) |
| #define LOCALDISPLAYUPDATE | ( | displayLevel, | |
| l, | |||
| ... ) |
| #define MAP_EMPTY_VALUE ((U32)-1) |
| #define ZDICT_STATIC_LINKING_ONLY |
| typedef struct COVER_map_pair_t_s COVER_map_pair_t |
| typedef struct COVER_map_s COVER_map_t |
| typedef struct COVER_tryParameters_data_s COVER_tryParameters_data_t |
Parameters for COVER_tryParameters().
| void COVER_best_destroy | ( | COVER_best_t * | best | ) |
Call COVER_best_wait() and then destroy the COVER_best_t.
| void COVER_best_finish | ( | COVER_best_t * | best, |
| ZDICT_cover_params_t | parameters, | ||
| COVER_dictSelection_t | selection ) |
Called when a thread finishes executing, both on error or success. Decrements liveJobs and signals any waiting threads if liveJobs == 0. If this dictionary is the best so far save it and its parameters.
| void COVER_best_init | ( | COVER_best_t * | best | ) |
Initialize the COVER_best_t.
| void COVER_best_start | ( | COVER_best_t * | best | ) |
Called when a thread is about to be launched. Increments liveJobs.
| void COVER_best_wait | ( | COVER_best_t * | best | ) |
Wait until liveJobs == 0.
| size_t COVER_checkTotalCompressedSize | ( | const ZDICT_cover_params_t | parameters, |
| const size_t * | samplesSizes, | ||
| const BYTE * | samples, | ||
| size_t * | offsets, | ||
| size_t | nbTrainSamples, | ||
| size_t | nbSamples, | ||
| BYTE *const | dict, | ||
| size_t | dictBufferCapacity ) |
Checks total compressed size of a dictionary
| COVER_epoch_info_t COVER_computeEpochs | ( | U32 | maxDictSize, |
| U32 | nbDmers, | ||
| U32 | k, | ||
| U32 | passes ) |
Computes the number of epochs and the size of each epoch. We will make sure that each epoch gets at least 10 * k bytes.
The COVER algorithms divide the data up into epochs of equal size and select one segment from each epoch.
| maxDictSize | The maximum allowed dictionary size. |
| nbDmers | The number of dmers we are training on. |
| k | The parameter k (segment size). |
| passes | The target number of passes over the dmer corpus. More passes means a better dictionary. |
| COVER_dictSelection_t COVER_dictSelectionError | ( | size_t | error | ) |
Error function for COVER_selectDict function. Returns a struct where return.totalCompressedSize is a ZSTD error.
| void COVER_dictSelectionFree | ( | COVER_dictSelection_t | selection | ) |
Always call after selectDict is called to free up used memory from newly created dictionary.
| unsigned COVER_dictSelectionIsError | ( | COVER_dictSelection_t | selection | ) |
Error function for COVER_selectDict function. Checks if the return value is an error.
| COVER_dictSelection_t COVER_selectDict | ( | BYTE * | customDictContent, |
| size_t | dictBufferCapacity, | ||
| size_t | dictContentSize, | ||
| const BYTE * | samplesBuffer, | ||
| const size_t * | samplesSizes, | ||
| unsigned | nbFinalizeSamples, | ||
| size_t | nbCheckSamples, | ||
| size_t | nbSamples, | ||
| ZDICT_cover_params_t | params, | ||
| size_t * | offsets, | ||
| size_t | totalCompressedSize ) |
Called to finalize the dictionary and select one based on whether or not the shrink-dict flag was enabled. If enabled the dictionary used is the smallest dictionary within a specified regression of the compressed size from the largest dictionary.
| size_t COVER_sum | ( | const size_t * | samplesSizes, |
| unsigned | nbSamples ) |
Returns the sum of the sample sizes.
| void COVER_warnOnSmallCorpus | ( | size_t | maxDictSize, |
| size_t | nbDmers, | ||
| int | displayLevel ) |
Warns the user when their corpus is too small.
| ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover | ( | void * | dictBuffer, |
| size_t | dictBufferCapacity, | ||
| const void * | samplesBuffer, | ||
| const size_t * | samplesSizes, | ||
| unsigned | nbSamples, | ||
| ZDICT_cover_params_t * | parameters ) |
| ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover | ( | void * | dictBuffer, |
| size_t | dictBufferCapacity, | ||
| const void * | samplesBuffer, | ||
| const size_t * | samplesSizes, | ||
| unsigned | nbSamples, | ||
| ZDICT_cover_params_t | parameters ) |