|
CWB
|
#include "../cl/globals.h"#include "../cl/cl.h"#include "../cl/corpus.h"#include "../cl/attributes.h"#include "../cl/storage.h"#include "../cl/bitio.h"#include "../cl/macros.h"| void bprintf | ( | unsigned int | i, |
| int | width, | ||
| FILE * | stream | ||
| ) |
Prints a binary representation of an integer to a stream.
| i | Integer to print |
| width | Number of bits in the integer |
| stream | Where to print to. |
Referenced by compute_code_lengths().
Compresses the token stream of a p-attribute.
Three files are created: the compressed token stream, the descriptor block, and a sync file.
| attr | The attribute to compress. |
| hc | Location for the resulting Huffmann code descriptor block. |
| fname | Base filename for the resulting files. |
References _Attribute::any, BFclose(), BFflush(), BFopen(), BFposition(), BFwriteWord(), bprintf(), CDA_OK, cderrno, cdperror, cl_calloc(), cl_cpos2id(), cl_malloc(), cl_max_cpos(), cl_max_id(), CL_MAX_LINE_LENGTH, CompCorpus, CompCorpusFreqs, CompHuffCodes, CompHuffSeq, CompHuffSync, CompLexicon, CompLexiconIdx, component_full_name(), corpus_id, do_protocol, ensure_component(), get_id_frequency, get_string_of_id, _huffman_code_descriptor::lcount, _huffman_code_descriptor::length, _huffman_code_descriptor::max_codelen, MAXCODELEN, _huffman_code_descriptor::min_code, _huffman_code_descriptor::min_codelen, NwriteInt(), print_heap(), protocol, TCorpus::registry_dir, TCorpus::registry_name, sift(), _huffman_code_descriptor::size, _huffman_code_descriptor::symbols, _huffman_code_descriptor::symindex, SYNCHRONIZATION, and WriteHCD().
Referenced by main().
| void decode_check_huff | ( | Attribute * | attr, |
| char * | fname | ||
| ) |
Checks a huffcoded attribute for errors by decompressing it.
This function assumes that compute_code_lengths() has been called beforehand and made sure that the _uncompressed_ token sequence is used by CL access functions.
| attr | The attribute to check. |
| fname | Base filename to use for the three compressed-attribute files. Can be NULL, in which case the filenames in the attribute are used. |
References _Attribute::any, BFclose(), BFflush(), BFopen(), BFposition(), BFread(), CDA_OK, cderrno, cl_cpos2id(), cl_max_cpos(), CL_MAX_LINE_LENGTH, CompCorpus, CompHuffCodes, CompHuffSeq, CompHuffSync, component_full_name(), corpus_id, _huffman_code_descriptor::length, _huffman_code_descriptor::min_code, NreadInt(), ReadHCD(), _huffman_code_descriptor::symbols, _huffman_code_descriptor::symindex, and SYNCHRONIZATION.
Referenced by main().
| void dump_heap | ( | int * | heap, |
| int | heap_size, | ||
| int | node, | ||
| int | indent | ||
| ) |
Dumps the specified heap of memory to the program output stream.
| heap | Location of the heap to dump. |
| heap_size | Number of nodes in the heap. |
| node | Heap at which to begin dumping. |
| indent | How many tabs to indent the start of each line. |
References protocol.
Referenced by print_heap().
| void huffcode_usage | ( | char * | msg, |
| int | error_code | ||
| ) |
Prints a usage message and exits the program.
| msg | A message about the error. |
| error_code | Value to be returned by the program when it exits. |
References drop_corpus, progname, and VERSION.
Referenced by main().
| int main | ( | int | argc, |
| char ** | argv | ||
| ) |
Main function for cwb-huffcode.
| argc | Number of command-line arguments. |
| argv | Command-line arguments. |
References _Attribute::any, ATT_POS, TCorpus::attributes, central_corpus_directory, cl_delete_corpus(), cl_new_attribute, cl_new_corpus(), compute_code_lengths(), corpus_id, debug, decode_check_huff(), DEFAULT_ATT_NAME, do_protocol, huffcode_usage(), progname, protocol, and registry_directory.
| void print_heap | ( | int * | heap, |
| int | heap_size, | ||
| char * | title | ||
| ) |
Prints a description of the specified heap of memory to the program output stream.
| heap | Location of the heap to print. |
| heap_size | Number of nodes in the heap. |
| title | Title of the heap to print. |
References dump_heap(), node, and protocol.
Referenced by compute_code_lengths().
| int ReadHCD | ( | char * | filename, |
| HCD * | hc | ||
| ) |
Reads a Huffman compressed sequence from file.
| filename | Path to file where compressed sequence is saved. |
| hc | Pointer to location where the sequence's descriptor block will be loaded to. |
References cl_malloc(), _huffman_code_descriptor::lcount, _huffman_code_descriptor::length, _huffman_code_descriptor::max_codelen, MAXCODELEN, _huffman_code_descriptor::min_code, _huffman_code_descriptor::min_codelen, NreadInt(), NreadInts(), _huffman_code_descriptor::size, _huffman_code_descriptor::symbols, and _huffman_code_descriptor::symindex.
Referenced by decode_check_huff().
| static int sift | ( | int * | heap, |
| int | heap_size, | ||
| int | node | ||
| ) | [static] |
Sifts the heap into order.
| heap | Location of the heap to sift. |
| heap_size | Number of nodes in the heap. |
| node | Node at which to begin sifting. |
Referenced by compute_code_lengths().
| int WriteHCD | ( | char * | filename, |
| HCD * | hc | ||
| ) |
Writes a Huffman code descriptor to file.
| filename | Path to file where descriptor is to be saved. |
| hc | Pointer to the descriptor block to save. |
References _huffman_code_descriptor::lcount, _huffman_code_descriptor::length, _huffman_code_descriptor::max_codelen, MAXCODELEN, _huffman_code_descriptor::min_code, _huffman_code_descriptor::min_codelen, NwriteInt(), NwriteInts(), _huffman_code_descriptor::size, _huffman_code_descriptor::symbols, and _huffman_code_descriptor::symindex.
Referenced by compute_code_lengths().
| char* corpus_id = NULL |
| int debug = 0 |
| int do_protocol = 0 |
Level of progress-info (inc compression protocol) message output: 0 = none.
Referenced by compute_code_lengths(), and main().
| char* progname |
| FILE* protocol |
File handle for this program's progress-info output: always stdout.
Referenced by compute_code_lengths(), dump_heap(), main(), and print_heap().
1.7.3