![]() |
Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "../common/mem.h"
#include "../common/pool.h"
#include "../common/threading.h"
#include "../common/zstd_internal.h"
#include "../zdict.h"
#include "cover.h"
Data Structures | |
struct | COVER_map_pair_t_s |
struct | COVER_map_s |
struct | COVER_ctx_t |
struct | COVER_tryParameters_data_s |
Macros | |
#define | ZDICT_STATIC_LINKING_ONLY |
#define | COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) |
#define | COVER_DEFAULT_SPLITPOINT 1.0 |
#define | DISPLAY(...) |
#define | LOCALDISPLAYLEVEL(displayLevel, l, ...) |
#define | DISPLAYLEVEL(l, ...) |
#define | LOCALDISPLAYUPDATE(displayLevel, l, ...) |
#define | DISPLAYUPDATE(l, ...) |
#define | MAP_EMPTY_VALUE ((U32)-1) |
Typedefs | |
typedef struct COVER_map_pair_t_s | COVER_map_pair_t |
typedef struct COVER_map_s | COVER_map_t |
typedef struct COVER_tryParameters_data_s | COVER_tryParameters_data_t |
Functions | |
size_t | COVER_sum (const size_t *samplesSizes, unsigned nbSamples) |
void | COVER_warnOnSmallCorpus (size_t maxDictSize, size_t nbDmers, int displayLevel) |
COVER_epoch_info_t | COVER_computeEpochs (U32 maxDictSize, U32 nbDmers, U32 k, U32 passes) |
ZDICTLIB_API size_t | ZDICT_trainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t parameters) |
size_t | COVER_checkTotalCompressedSize (const ZDICT_cover_params_t parameters, const size_t *samplesSizes, const BYTE *samples, size_t *offsets, size_t nbTrainSamples, size_t nbSamples, BYTE *const dict, size_t dictBufferCapacity) |
void | COVER_best_init (COVER_best_t *best) |
void | COVER_best_wait (COVER_best_t *best) |
void | COVER_best_destroy (COVER_best_t *best) |
void | COVER_best_start (COVER_best_t *best) |
void | COVER_best_finish (COVER_best_t *best, ZDICT_cover_params_t parameters, COVER_dictSelection_t selection) |
COVER_dictSelection_t | COVER_dictSelectionError (size_t error) |
unsigned | COVER_dictSelectionIsError (COVER_dictSelection_t selection) |
void | COVER_dictSelectionFree (COVER_dictSelection_t selection) |
COVER_dictSelection_t | COVER_selectDict (BYTE *customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE *samplesBuffer, const size_t *samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t *offsets, size_t totalCompressedSize) |
ZDICTLIB_API size_t | ZDICT_optimizeTrainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t *parameters) |
#define COVER_DEFAULT_SPLITPOINT 1.0 |
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) |
There are 32bit indexes used to ref samples, so limit samples size to 4GB on 64bit builds. For 32bit builds we choose 1 GB. Most 32bit platforms have 2GB user-mode addressable space and we allocate a large contiguous buffer, so 1GB is already a high limit.
#define DISPLAY | ( | ... | ) |
#define DISPLAYLEVEL | ( | l, | |
... ) |
#define DISPLAYUPDATE | ( | l, | |
... ) |
#define LOCALDISPLAYLEVEL | ( | displayLevel, | |
l, | |||
... ) |
#define LOCALDISPLAYUPDATE | ( | displayLevel, | |
l, | |||
... ) |
#define MAP_EMPTY_VALUE ((U32)-1) |
#define ZDICT_STATIC_LINKING_ONLY |
typedef struct COVER_map_pair_t_s COVER_map_pair_t |
typedef struct COVER_map_s COVER_map_t |
typedef struct COVER_tryParameters_data_s COVER_tryParameters_data_t |
Parameters for COVER_tryParameters().
void COVER_best_destroy | ( | COVER_best_t * | best | ) |
Call COVER_best_wait() and then destroy the COVER_best_t.
void COVER_best_finish | ( | COVER_best_t * | best, |
ZDICT_cover_params_t | parameters, | ||
COVER_dictSelection_t | selection ) |
Called when a thread finishes executing, both on error or success. Decrements liveJobs and signals any waiting threads if liveJobs == 0. If this dictionary is the best so far save it and its parameters.
void COVER_best_init | ( | COVER_best_t * | best | ) |
Initialize the COVER_best_t
.
void COVER_best_start | ( | COVER_best_t * | best | ) |
Called when a thread is about to be launched. Increments liveJobs.
void COVER_best_wait | ( | COVER_best_t * | best | ) |
Wait until liveJobs == 0.
size_t COVER_checkTotalCompressedSize | ( | const ZDICT_cover_params_t | parameters, |
const size_t * | samplesSizes, | ||
const BYTE * | samples, | ||
size_t * | offsets, | ||
size_t | nbTrainSamples, | ||
size_t | nbSamples, | ||
BYTE *const | dict, | ||
size_t | dictBufferCapacity ) |
Checks total compressed size of a dictionary
COVER_epoch_info_t COVER_computeEpochs | ( | U32 | maxDictSize, |
U32 | nbDmers, | ||
U32 | k, | ||
U32 | passes ) |
Computes the number of epochs and the size of each epoch. We will make sure that each epoch gets at least 10 * k bytes.
The COVER algorithms divide the data up into epochs of equal size and select one segment from each epoch.
maxDictSize | The maximum allowed dictionary size. |
nbDmers | The number of dmers we are training on. |
k | The parameter k (segment size). |
passes | The target number of passes over the dmer corpus. More passes means a better dictionary. |
COVER_dictSelection_t COVER_dictSelectionError | ( | size_t | error | ) |
Error function for COVER_selectDict function. Returns a struct where return.totalCompressedSize is a ZSTD error.
void COVER_dictSelectionFree | ( | COVER_dictSelection_t | selection | ) |
Always call after selectDict is called to free up used memory from newly created dictionary.
unsigned COVER_dictSelectionIsError | ( | COVER_dictSelection_t | selection | ) |
Error function for COVER_selectDict function. Checks if the return value is an error.
COVER_dictSelection_t COVER_selectDict | ( | BYTE * | customDictContent, |
size_t | dictBufferCapacity, | ||
size_t | dictContentSize, | ||
const BYTE * | samplesBuffer, | ||
const size_t * | samplesSizes, | ||
unsigned | nbFinalizeSamples, | ||
size_t | nbCheckSamples, | ||
size_t | nbSamples, | ||
ZDICT_cover_params_t | params, | ||
size_t * | offsets, | ||
size_t | totalCompressedSize ) |
Called to finalize the dictionary and select one based on whether or not the shrink-dict flag was enabled. If enabled the dictionary used is the smallest dictionary within a specified regression of the compressed size from the largest dictionary.
size_t COVER_sum | ( | const size_t * | samplesSizes, |
unsigned | nbSamples ) |
Returns the sum of the sample sizes.
void COVER_warnOnSmallCorpus | ( | size_t | maxDictSize, |
size_t | nbDmers, | ||
int | displayLevel ) |
Warns the user when their corpus is too small.
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover | ( | void * | dictBuffer, |
size_t | dictBufferCapacity, | ||
const void * | samplesBuffer, | ||
const size_t * | samplesSizes, | ||
unsigned | nbSamples, | ||
ZDICT_cover_params_t * | parameters ) |
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover | ( | void * | dictBuffer, |
size_t | dictBufferCapacity, | ||
const void * | samplesBuffer, | ||
const size_t * | samplesSizes, | ||
unsigned | nbSamples, | ||
ZDICT_cover_params_t | parameters ) |