Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
Loading...
Searching...
No Matches
cover.h File Reference
#include "../common/threading.h"
#include "../common/mem.h"
#include "../zdict.h"

Go to the source code of this file.

Data Structures

struct  COVER_best_s
 
struct  COVER_segment_t
 
struct  COVER_epoch_info_t
 
struct  COVER_dictSelection
 

Typedefs

typedef struct COVER_best_s COVER_best_t
 
typedef struct COVER_dictSelection COVER_dictSelection_t
 

Functions

COVER_epoch_info_t COVER_computeEpochs (U32 maxDictSize, U32 nbDmers, U32 k, U32 passes)
 
void COVER_warnOnSmallCorpus (size_t maxDictSize, size_t nbDmers, int displayLevel)
 
size_t COVER_checkTotalCompressedSize (const ZDICT_cover_params_t parameters, const size_t *samplesSizes, const BYTE *samples, size_t *offsets, size_t nbTrainSamples, size_t nbSamples, BYTE *const dict, size_t dictBufferCapacity)
 
size_t COVER_sum (const size_t *samplesSizes, unsigned nbSamples)
 
void COVER_best_init (COVER_best_t *best)
 
void COVER_best_wait (COVER_best_t *best)
 
void COVER_best_destroy (COVER_best_t *best)
 
void COVER_best_start (COVER_best_t *best)
 
void COVER_best_finish (COVER_best_t *best, ZDICT_cover_params_t parameters, COVER_dictSelection_t selection)
 
unsigned COVER_dictSelectionIsError (COVER_dictSelection_t selection)
 
COVER_dictSelection_t COVER_dictSelectionError (size_t error)
 
void COVER_dictSelectionFree (COVER_dictSelection_t selection)
 
COVER_dictSelection_t COVER_selectDict (BYTE *customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE *samplesBuffer, const size_t *samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t *offsets, size_t totalCompressedSize)
 

Typedef Documentation

◆ COVER_best_t

typedef struct COVER_best_s COVER_best_t

COVER_best_t is used for two purposes:

  1. Synchronizing threads.
  2. Saving the best parameters and dictionary.

All of the methods except COVER_best_init() are thread safe if zstd is compiled with multithreaded support.

◆ COVER_dictSelection_t

Struct used for the dictionary selection function.

Function Documentation

◆ COVER_best_destroy()

void COVER_best_destroy ( COVER_best_t * best)

Call COVER_best_wait() and then destroy the COVER_best_t.

◆ COVER_best_finish()

void COVER_best_finish ( COVER_best_t * best,
ZDICT_cover_params_t parameters,
COVER_dictSelection_t selection )

Called when a thread finishes executing, both on error or success. Decrements liveJobs and signals any waiting threads if liveJobs == 0. If this dictionary is the best so far save it and its parameters.

◆ COVER_best_init()

void COVER_best_init ( COVER_best_t * best)

Initialize the COVER_best_t.

◆ COVER_best_start()

void COVER_best_start ( COVER_best_t * best)

Called when a thread is about to be launched. Increments liveJobs.

◆ COVER_best_wait()

void COVER_best_wait ( COVER_best_t * best)

Wait until liveJobs == 0.

◆ COVER_checkTotalCompressedSize()

size_t COVER_checkTotalCompressedSize ( const ZDICT_cover_params_t parameters,
const size_t * samplesSizes,
const BYTE * samples,
size_t * offsets,
size_t nbTrainSamples,
size_t nbSamples,
BYTE *const dict,
size_t dictBufferCapacity )

Checks total compressed size of a dictionary

◆ COVER_computeEpochs()

COVER_epoch_info_t COVER_computeEpochs ( U32 maxDictSize,
U32 nbDmers,
U32 k,
U32 passes )

Computes the number of epochs and the size of each epoch. We will make sure that each epoch gets at least 10 * k bytes.

The COVER algorithms divide the data up into epochs of equal size and select one segment from each epoch.

Parameters
maxDictSizeThe maximum allowed dictionary size.
nbDmersThe number of dmers we are training on.
kThe parameter k (segment size).
passesThe target number of passes over the dmer corpus. More passes means a better dictionary.

◆ COVER_dictSelectionError()

COVER_dictSelection_t COVER_dictSelectionError ( size_t error)

Error function for COVER_selectDict function. Returns a struct where return.totalCompressedSize is a ZSTD error.

◆ COVER_dictSelectionFree()

void COVER_dictSelectionFree ( COVER_dictSelection_t selection)

Always call after selectDict is called to free up used memory from newly created dictionary.

◆ COVER_dictSelectionIsError()

unsigned COVER_dictSelectionIsError ( COVER_dictSelection_t selection)

Error function for COVER_selectDict function. Checks if the return value is an error.

◆ COVER_selectDict()

COVER_dictSelection_t COVER_selectDict ( BYTE * customDictContent,
size_t dictBufferCapacity,
size_t dictContentSize,
const BYTE * samplesBuffer,
const size_t * samplesSizes,
unsigned nbFinalizeSamples,
size_t nbCheckSamples,
size_t nbSamples,
ZDICT_cover_params_t params,
size_t * offsets,
size_t totalCompressedSize )

Called to finalize the dictionary and select one based on whether or not the shrink-dict flag was enabled. If enabled the dictionary used is the smallest dictionary within a specified regression of the compressed size from the largest dictionary.

◆ COVER_sum()

size_t COVER_sum ( const size_t * samplesSizes,
unsigned nbSamples )

Returns the sum of the sample sizes.

◆ COVER_warnOnSmallCorpus()

void COVER_warnOnSmallCorpus ( size_t maxDictSize,
size_t nbDmers,
int displayLevel )

Warns the user when their corpus is too small.