Parolin 0.7.9 6796
Console (soon DLLs) to do a tar like job
Loading...
Searching...
No Matches
cover.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "../common/mem.h"
#include "../common/pool.h"
#include "../common/threading.h"
#include "../common/zstd_internal.h"
#include "../common/bits.h"
#include "../zdict.h"
#include "cover.h"

Data Structures

struct  COVER_map_pair_t_s
 
struct  COVER_map_s
 
struct  COVER_ctx_t
 
struct  COVER_tryParameters_data_s
 

Macros

#define ZDICT_STATIC_LINKING_ONLY
 
#define COVER_MAX_SAMPLES_SIZE   (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
 
#define COVER_DEFAULT_SPLITPOINT   1.0
 
#define DISPLAY(...)
 
#define LOCALDISPLAYLEVEL(displayLevel, l, ...)
 
#define DISPLAYLEVEL(l, ...)
 
#define LOCALDISPLAYUPDATE(displayLevel, l, ...)
 
#define DISPLAYUPDATE(l, ...)
 
#define MAP_EMPTY_VALUE   ((U32)-1)
 

Typedefs

typedef struct COVER_map_pair_t_s COVER_map_pair_t
 
typedef struct COVER_map_s COVER_map_t
 
typedef struct COVER_tryParameters_data_s COVER_tryParameters_data_t
 

Functions

size_t COVER_sum (const size_t *samplesSizes, unsigned nbSamples)
 
void COVER_warnOnSmallCorpus (size_t maxDictSize, size_t nbDmers, int displayLevel)
 
COVER_epoch_info_t COVER_computeEpochs (U32 maxDictSize, U32 nbDmers, U32 k, U32 passes)
 
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t parameters)
 
size_t COVER_checkTotalCompressedSize (const ZDICT_cover_params_t parameters, const size_t *samplesSizes, const BYTE *samples, size_t *offsets, size_t nbTrainSamples, size_t nbSamples, BYTE *const dict, size_t dictBufferCapacity)
 
void COVER_best_init (COVER_best_t *best)
 
void COVER_best_wait (COVER_best_t *best)
 
void COVER_best_destroy (COVER_best_t *best)
 
void COVER_best_start (COVER_best_t *best)
 
void COVER_best_finish (COVER_best_t *best, ZDICT_cover_params_t parameters, COVER_dictSelection_t selection)
 
COVER_dictSelection_t COVER_dictSelectionError (size_t error)
 
unsigned COVER_dictSelectionIsError (COVER_dictSelection_t selection)
 
void COVER_dictSelectionFree (COVER_dictSelection_t selection)
 
COVER_dictSelection_t COVER_selectDict (BYTE *customDictContent, size_t dictBufferCapacity, size_t dictContentSize, const BYTE *samplesBuffer, const size_t *samplesSizes, unsigned nbFinalizeSamples, size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t *offsets, size_t totalCompressedSize)
 
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover (void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, ZDICT_cover_params_t *parameters)
 

Macro Definition Documentation

◆ COVER_DEFAULT_SPLITPOINT

#define COVER_DEFAULT_SPLITPOINT   1.0

◆ COVER_MAX_SAMPLES_SIZE

#define COVER_MAX_SAMPLES_SIZE   (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))

There are 32bit indexes used to ref samples, so limit samples size to 4GB on 64bit builds. For 32bit builds we choose 1 GB. Most 32bit platforms have 2GB user-mode addressable space and we allocate a large contiguous buffer, so 1GB is already a high limit.

◆ DISPLAY

#define DISPLAY ( ...)
Value:
{ \
fprintf(stderr, __VA_ARGS__); \
fflush(stderr); \
}

◆ DISPLAYLEVEL

#define DISPLAYLEVEL ( l,
... )
Value:
LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
#define LOCALDISPLAYLEVEL(displayLevel, l,...)
Definition cover.c:66

◆ DISPLAYUPDATE

#define DISPLAYUPDATE ( l,
... )
Value:
LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
#define LOCALDISPLAYUPDATE(displayLevel, l,...)
Definition cover.c:78

◆ LOCALDISPLAYLEVEL

#define LOCALDISPLAYLEVEL ( displayLevel,
l,
... )
Value:
if (displayLevel >= l) { \
DISPLAY(__VA_ARGS__); \
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */

◆ LOCALDISPLAYUPDATE

#define LOCALDISPLAYUPDATE ( displayLevel,
l,
... )
Value:
if (displayLevel >= l) { \
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
g_time = clock(); \
DISPLAY(__VA_ARGS__); \
} \
}

◆ MAP_EMPTY_VALUE

#define MAP_EMPTY_VALUE   ((U32)-1)

◆ ZDICT_STATIC_LINKING_ONLY

#define ZDICT_STATIC_LINKING_ONLY

Typedef Documentation

◆ COVER_map_pair_t

◆ COVER_map_t

typedef struct COVER_map_s COVER_map_t

◆ COVER_tryParameters_data_t

Parameters for COVER_tryParameters().

Function Documentation

◆ COVER_best_destroy()

void COVER_best_destroy ( COVER_best_t * best)

Call COVER_best_wait() and then destroy the COVER_best_t.

◆ COVER_best_finish()

void COVER_best_finish ( COVER_best_t * best,
ZDICT_cover_params_t parameters,
COVER_dictSelection_t selection )

Called when a thread finishes executing, both on error or success. Decrements liveJobs and signals any waiting threads if liveJobs == 0. If this dictionary is the best so far save it and its parameters.

◆ COVER_best_init()

void COVER_best_init ( COVER_best_t * best)

Initialize the COVER_best_t.

◆ COVER_best_start()

void COVER_best_start ( COVER_best_t * best)

Called when a thread is about to be launched. Increments liveJobs.

◆ COVER_best_wait()

void COVER_best_wait ( COVER_best_t * best)

Wait until liveJobs == 0.

◆ COVER_checkTotalCompressedSize()

size_t COVER_checkTotalCompressedSize ( const ZDICT_cover_params_t parameters,
const size_t * samplesSizes,
const BYTE * samples,
size_t * offsets,
size_t nbTrainSamples,
size_t nbSamples,
BYTE *const dict,
size_t dictBufferCapacity )

Checks total compressed size of a dictionary

◆ COVER_computeEpochs()

COVER_epoch_info_t COVER_computeEpochs ( U32 maxDictSize,
U32 nbDmers,
U32 k,
U32 passes )

Computes the number of epochs and the size of each epoch. We will make sure that each epoch gets at least 10 * k bytes.

The COVER algorithms divide the data up into epochs of equal size and select one segment from each epoch.

Parameters
maxDictSizeThe maximum allowed dictionary size.
nbDmersThe number of dmers we are training on.
kThe parameter k (segment size).
passesThe target number of passes over the dmer corpus. More passes means a better dictionary.

◆ COVER_dictSelectionError()

COVER_dictSelection_t COVER_dictSelectionError ( size_t error)

Error function for COVER_selectDict function. Returns a struct where return.totalCompressedSize is a ZSTD error.

◆ COVER_dictSelectionFree()

void COVER_dictSelectionFree ( COVER_dictSelection_t selection)

Always call after selectDict is called to free up used memory from newly created dictionary.

◆ COVER_dictSelectionIsError()

unsigned COVER_dictSelectionIsError ( COVER_dictSelection_t selection)

Error function for COVER_selectDict function. Checks if the return value is an error.

◆ COVER_selectDict()

COVER_dictSelection_t COVER_selectDict ( BYTE * customDictContent,
size_t dictBufferCapacity,
size_t dictContentSize,
const BYTE * samplesBuffer,
const size_t * samplesSizes,
unsigned nbFinalizeSamples,
size_t nbCheckSamples,
size_t nbSamples,
ZDICT_cover_params_t params,
size_t * offsets,
size_t totalCompressedSize )

Called to finalize the dictionary and select one based on whether or not the shrink-dict flag was enabled. If enabled the dictionary used is the smallest dictionary within a specified regression of the compressed size from the largest dictionary.

◆ COVER_sum()

size_t COVER_sum ( const size_t * samplesSizes,
unsigned nbSamples )

Returns the sum of the sample sizes.

◆ COVER_warnOnSmallCorpus()

void COVER_warnOnSmallCorpus ( size_t maxDictSize,
size_t nbDmers,
int displayLevel )

Warns the user when their corpus is too small.

◆ ZDICT_optimizeTrainFromBuffer_cover()

ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover ( void * dictBuffer,
size_t dictBufferCapacity,
const void * samplesBuffer,
const size_t * samplesSizes,
unsigned nbSamples,
ZDICT_cover_params_t * parameters )

◆ ZDICT_trainFromBuffer_cover()

ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover ( void * dictBuffer,
size_t dictBufferCapacity,
const void * samplesBuffer,
const size_t * samplesSizes,
unsigned nbSamples,
ZDICT_cover_params_t parameters )