Previous | Table of Contents | Next |

/************************* Start of ARITH-N.C **************************/ * * This is the order-n arithmetic coding module used in the final * part of chapter 6. * * Compile with BITIO.C. ERRHAND.C, and either MAIN-C.C OR MAIN-E.C. This * program should be compiled in large model. An even better alternative * is a DOS extender. * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include "errhand.h" #include "bitio.h" /* * The SYMBOL structure is what is used to defined a symbol in * arithmetic coding terms. A symbol is defined as a range between * 0 and 1. Since we are using integer math, instead of using 0 and 1 * as our end points, we have an integer scale. The low_count and * high_count define where the symbol falls in the range. */ typedef struct { unsigned short int low_count; unsigned short int high_count; unsigned short int scale; } SYMBOL; #define MAXIMUM_SCALE 16383 /* Maximum allowed frequency count */ #define ESCAPE 256 /* The escape symbol */ #define DONE (-1) /* The output stream empty symbol */ #define FLUSH (-2) /* The symbol to flush the model */ /* * Function prototypes. */ #ifdef __STDC__ void initialize_options( int argc, char **argv ); int check_compression( FILE *input, BIT_FILE *output ); void initialize_model( void ); void update_model( int symbol ); int convert_int_to_symbol( int symbol, SYMBOL *s ); void get_symbol_scale( SYMBOL *s ); int convert_symbol_to_int( int count, SYMBOL *s ); void add_character_to_model( int c ); void flush_model( void ); void initialize_arithmetic_decoder( BIT_FILE *stream ) ; void remove_symbol_from_stream( BIT_FILE *stream, SYMBOL *s ); void initialize_arithmetic_encoder( void ); void encode_symbol( BIT_FILE *stream, SYMBOL *s ); void flush_arithmetic_encoder( BIT_FILE *stream ); short int get_current_count( SYMBOL *s ); #else void initialize_options(); int check_compression(); void initialize_model(); void update_model(); int convert_int_to_symbol(); void get_symbol_scale(); int convert_symbol_to_int(); void add_character_to_model(); void flush_model(); void initialize_arithmetic_decoder(); void remove_symbol_from_stream(); void initialize_arithmetic_encoder(); void encode_symbol(); void flush_arithmetic_encoder(); short int get_current_count(); #endif char *CompressionName = "Adaptive order n model with arithmetic coding"; char *Usage = "in-file out-file [ -o order ]\n\n"; int max_order = 3; /* * The main procedure is similar to the main found in ARITH1E.C. It has * to initialize the coder and the model. It then sits in a loop reading * input symbols and encoding them. One difference is that every 256 * symbols a compression check is performed. If the compression ratio * falls below 10%, a flush character is encoded. This flushes the encod * ing model, and will cause the decoder to flush its model when the * file is being expanded. The second difference is that each symbol is * repeatedly encoded until a successful encoding occurs. When trying to * encode a character in a particular order, the model may have to * transmit an ESCAPE character. If this is the case, the character has * to be retransmitted using a lower order. This process repeats until a * successful match is found of the symbol in a particular context. * Usually this means going down no further than the order -1 model. * However, the FLUSH and DONE symbols drop back to the order -2 model. * */ void CompressFile( input, output, argc, argv ) FILE *input; BIT_FILE *output; int argc; char *argv[]; { SYMBOL s; int c; int escaped; int flush = 0; long int text_count = 0; initialize_options( argc, argv ); initialize_model(); initialize-arithmetic_encoder(); for ( ; ; ) { if ( ( ++text_count & 0x0ff ) == 0 ) flush = check_compression( input, output ); if ( !flush ) c = getc( input ); else c = FLUSH; if ( c == EOF ) c = DONE; do { escaped = convert_int_to_symbol( c, &s); encode_symbol( output, &s ); } while ( escaped ); if ( c == DONE ) break; if ( c == FLUSH ) { flush_model(); flush = 0; } update_model( c ); add_character_to_model( c ); } flush_arithmetic_encoder( output ); } /* * The main loop for expansion is very similar to the expansion * routine used in the simpler compression program, ARITH1E.C. The * routine first has to initialize the the arithmetic coder and the * model. The decompression loop differs in a couple of respect. * First of all, it handles the special ESCAPE character, by * removing them from the input bit stream but just throwing them * away otherwise. Secondly, it handles the special FLUSH character. * Once the main decoding loop is done, the cleanup code is called, * and the program exits. * */ void ExpandFile( input, output, argc, argv ) BIT_FILE *input; FILE *output; int argc; char *argv[]; { SYMBOL s; int c; int count; initialize_options( argc, argv ); initialize_model(); initialize_arithmetic_decoder( input ); for ( ; ; ) { do { get_symbol_scale( &s ); count = get_current_count( &s ); c = convert_symbol_to_int( count, &s ); remove_symbol_from_stream( input, &s ); } while ( c == ESCAPE ); if ( c == DONE ) break; if ( c != FLUSH ) putc( (char) c, output ); else flush_model(); update_model( c ); add_character_to_model( c ); } } /* * This routine checks for command line options. At present, the only * option being passed on the command line is the order. */ void initialize_options( argc, argv ) int argc; char *argv[]; { while ( argc–– > 0 ) { if ( strcmp( *argv, "-o" ) == 0 ) { argc––; max_order = atoi( *++argv ); } else printf( "Uknown argument on command line: %s\n", *argv ); argc––; argv++; } } /* * This routine is called once every 256 input symbols. Its job is to * check to see if the compression ratio falls below 10%. If the * output size is 90% of the input size, it means not much compression * is taking place, so we probably ought to flush the statistics in the * model to allow for more current statistics to have greater impact. * This heuristic approach does seem to have some effect. */ int check_compression( input, output ) FILE *input; BIT_FILE *output; { static long local_input_marker = 0L; static long local_output_marker = 0L; long total_input_bytes; long total_output_bytes; int local_ratio; total_input_bytes = ftell( input ) - local_input_marker; total_output_bytes = ftell( output->file ); total_output_bytes -= local_output_marker; if ( total_output_bytes == 0 ) total_output_bytes = 1; local_ratio = (int)( ( total_output_bytes * 100 ) / total_input_bytes ); local_input_marker = ftell( input ); local_output_marker = ftell( output->file ); return( local_ratio > 90 ); } /* * * The next few routines contain all of the code and data used to * perform modeling for this program. This modeling unit keeps track * of all contexts from 0 up to max_order, which defaults to 3. In * addition, there is a special context -1 which is a fixed model used * to encode previously unseen characters, and a context -2 which is * used to encode EOF and FLUSH messages. * * Each context is stored in a special CONTEXT structure, which is * documented below. Context tables are not created until the context * is seen, and they are never destroyed. * */ /* * A context table contains a list of the counts for all symbols * that have been seen in the defined context. For example, a * context of "Zor" might have only had 2 different characters * appear. 't' might have appeared 10 times, and '1' might have * appeared once. These two counts are stored in the context * table. The counts are stored in the STATS structure. All of * the counts for a given context are stored in and array of STATS. * As new characters are added to a particular contexts, the STATS * array will grow. Sometimes the STATS array will shrink * after flushing the model. */ typedef struct { unsigned char symbol; unsigned char counts; } STATS; /* * Each context has to have links to higher order contexts. These * links are used to navigate through the context tables. For example, * to find the context table for "ABC", I start at the order 0 table, * then find the pointer to the "A" context table by looking through * the LINKS array. At that table, we find the "B" link and go to * that table. The process continues until the destination table is * found. The table pointed to by the LINKS array corresponds to the * symbol found at the same offset in the STATS table. The reason that * LINKS is in a separate structure instead of being combined with * STATS is to save space. All of the leaf context nodes don't need * next pointers, since they are in the highest order context. In the * leaf nodes, the LINKS array is a NULL pointer. */ typedef struct { struct context *next; } LINKS; /* * The CONTEXT structure holds all of the known information about * a particular context. The links and stats pointers are discussed * immediately above here. The max_index element gives the maximum * index that can be applied to the stats or link array. When the * table is first created, and stats is set to NULL, max_index is set * to -1. As soon as single element is added to stats, max_index is * incremented to 0. * * The lesser context pointer is a navigational aid. It points to * the context that is one less than the current order. For example, * if the current context is "ABC", the lesser_context pointer will * point to "BC". The reason for maintaining this pointer is that * this particular bit of table searching is done frequently, but * the pointer only needs to be built once, when the context is * created. */ typedef struct context { int max_index; LINKS *links; STATS *stats; struct context *lesser_context; } CONTEXT; /* * *contexts[] is an array of current contexts. If I want to find * the order 0 context for the current state of the model. I just * look at contexts[0]. This array of context pointers is set up * every time the model is updated. */ CONTEXT **contexts; /* * current_order contains the current order of the model. It starts * at max_order, and is decremented every time an ESCAPE is sent. It * will only go down to -1 for normal symbols, but can go to -2 for * EOF and FLUSH. */ int current_order; /* * This table contains the cumulative totals for the current context. * Because this program is using exclusion, totals has to be calculated * every time a context is used. The scoreboard array keeps track of * symbols that have appeared in higher order models, so that they * can be excluded from lower order context total calculations. */ short int totals[ 258 ]; char scoreboard[ 256 ]; /* * Local procedure declarations for modeling routines. */ #ifdef __STDC__ void update_table( CONTEXT *table, int symbol ); void rescale_table( CONTEXT *table ); void totalize_table( CONTEXT *table ); CONTEXT *shift_to_next_context( CONTEXT *table, int c, int order ); CONTEXT *allocate_next_order_table( CONTEXT *table, int symbol, CONTEXT *lesser_context ); void recursive_flush( CONTEXT *table ); #else void update_table(); void rescale_table(); void totalize_table(); CONTEXT *shift_to_next_context(); CONTEXT *allocate_next_order_table(); void recursive_flush(); #endif /* * This routine has to get everything set up properly so that * the model can be maintained properly. The first step is to create * the *contexts[] array used later to find current context tables. * The *contexts[] array indices go from -2 up to max_order, so * the table needs to be fiddled with a little. This routine then * has to create the special order -2 and order -1 tables by hand, * since they aren't quite like other tables. Then the current * context is set to \0, \0, \0, ... and the appropriate tables * are built to support that context. The current order is set * to max_order, the scoreboard is cleared, and the system is * ready to go. */ void initialize_model() { int i; CONTEXT *null_table; CONTEXT *control_table; current_order = max_order; contexts = (CONTEXT **) calloc( sizeof( CONTEXT * ), 10 ); if ( contexts == NULL ) fatal_error( "Failure #1: allocating context table!" ); context += 2; null_table = (CONTEXT *) calloc( sizeof( CONTEXT ), 1 ); if ( null_table == NULL ) fatal_error( "Failure #2: allocating null table!" ); null_table->max_index = -1; contexts[ -1 ] = null_table; for ( i = 0 ; i <= max_order ; 1++ ) contexts[ i ] = allocate_next_order_table( contexts[ i-1 ], 0, contexts[ i-1 ] ); free( (char *) null_table->stats ); null_table->stats = (STATS &) calloc( sizeof( STATS ), 256 ); if ( null_table->stats == NULL ) fatal_error( "Failure #3: allocating null table!" ); null_table->max_index = 255; for ( i=0 ; i < 256 ; i++ ) { null_table->stats[ i ].symbol = (unsigned char) i; null_table->stats[ i ].counts = 1; } control_table = (CONTEXT *) calloc( sizeof(CONTEXT), 1 ); if ( control_table == NULL ) fatal_error( "Failure #4: allocating null table!" ); control_table->stats = (STATS *) calloc( sizeof( STATS ), 2 ); if ( control_table->stats == NULL ) fatal_error( "Failure #5: allocating null table!" ); contexts[ -2 ] = control_table; control_table->max_index = 1; control_table->stats[ 0 ].symbol = -FLUSH; control_table->stats[ 0 ].counts = 1; control_table->stats[ 1 ].symbol =– DONE; control_table->stats[ 1 ].counts = 1; for ( i = 0 ; i < 256 ; i++ ) scoreboard[ i ] = 0; } /* * This is a utility routine used to create new tables when a new * context is created. It gets a pointer to the current context, * and gets the symbol that needs to be added to it. It also needs * a pointer to the lesser context for the table that is to be * created. For example, if the current context was "ABC", and the * symbol 'D' was read in, add_character_to_model would need to * create the new context "BCD". This routine would get called * with a pointer to "BC", the symbol 'D', and a pointer to context * "CD". This routine then creates a new table for "BCD", adds it * to the link table for "BC", and gives "BCD" a back pointer to * "CD". Note that finding the lesser context is a difficult * task, and isn't done here. This routine mainly worries about * modifying the stats and links fields in the current context. */ CONTEXT *allocate_next_order_table( table, symbol, lesser_context ) CONTEXT *table; int symbol; CONTEXT *lesser_context; { CONTEXT *new_table; int i; unsigned int new_size; for ( i = 0 ; i <= table->max_index ; i++ ) if (table->stats[ i ].symbol == (unsigned char) symbol ) break; if ( i > table->max_index ) { table->max_index++; new_size = sizeof( LINKS ); new_size *= table->max_index + 1; if ( table->links == NULL ) table->links = (LINKS *) calloc( new_size, 1 ); else table->links = (LINKS *) realloc( (char *) table->links, new_size ); new_size = sizeof( STATS ); new_size *= table->max_index + 1; if ( table->stats == NULL ) table->stats = (STATS *) calloc( new_size, 1 ); else table->stats = (STATS *) realloc( (char *) table->stats, new_size ); if ( table->links == NULL ) fatal_error( "Failure #6: allocating new table" ); if ( table->stats == NULL ) fatal_error( "Failure #7: allocating new table" ); table->stats[ i ].symbol = (unsigned char) symbol; table->stats[ i ].counts = 0; } new_table = (CONTEXT *) calloc(sizeof( CONTEXT ), 1 ); if ( new_table == NULL ) fatal_error( "Failure #8: allocating new table" ); new_table->max_index = -1; table->links[ i ].next = new_table; new_table->lesser_context = lesser_context; return( new_table ); } /* * This routine is called to increment the counts for the current * contexts. It is called after a character has been encoded or * decoded. All it does is call update_table for each of the * current contexts, which does the work of incrementing the count. * This particular version of update_model() practices update exclusion, * which means that if lower order models weren't used to encode * or decode the character, they don't get their counts updated. * This seems to improve compression performance quite a bit. * To disable update exclusion, the loop would be changed to run * from 0 to max_order, instead of current_order to max_order. */ void update_model( symbol ) int symbol; { int i; int local_order; if ( current_order < 0 ) local_order = 0; else local_order = current_order; if ( symbol >= 0 ) { while ( local_order <= max_order ) { if ( symbol >= 0 ) update_table( contexts[ local_order ], symbol ); local_order++; } } current_order = max_order; for ( i = 0 ; i < 256 ; i++ ) scoreboard[ i ] = 0; } /* * This routine is called to update the count for a particular symbol * in a particular table. The table is one of the current contexts, * and the symbol is the last symbol encoded or decoded. In principle * this is a fairly simple routine, but a couple of complications make * things a little messier. First of all, the given table may not * already have the symbol defined in its statistics table. If it * doesn't, the stats table has to grow and have the new guy added * to it. Secondly, the symbols are kept in sorted order by count * in the table so that the table can be trimmed during the flush * operation. When this symbol is incremented, it might have to be moved * up to reflect its new rank. Finally, since the counters are only * bytes, if the count reaches 255, the table absolutely must be rescaled * to get the counts back down to a reasonable level. */ void update_table( table, symbol ) CONTEXT *table; int symbol; { int i; int index; unsigned char temp; CONTEXT *temp_ptr; unsigned int new_size; /* * First, find the symbol in the appropriate context table. The first * symbol in the table is the most active, so start there. */ index = 0; while ( index <= table->max_index && table->stats[index].symbol != (unsigned char) symbol ) index++; if ( index > table->max_index ) { table->max_index++; new_size = sizeof( LINKS ); new_size *= table->max_index + 1; if ( current_order < max_order ) { if ( table->max_index == 0 ) table->links - (LINKS *) calloc( new_size, 1 ); else table->links = (LINKS *) realloc( (char *) table->links, new_size ); if ( table->links == NULL ) fatal_error( "Error #9: reallocating table space!" ); table->links[ index ].next = NULL; } new_size = sizeof( STATS ); new_size *= table->max_index + 1; if (table->max_index==0) table->stats = (STATS *) calloc( new_size, 1 ); else table->stats = (STATS *) realloc( (char *) table->stats, new_size ); if ( table->stats == NULL ) fatal_error( "Error #10: reallocating table space!" ); table->stats[ index ].symbol = (unsigned char) symbol; table->stats[ index ].counts = 0; } /* * Now I move the symbol to the front of its list. */ i = index; while ( i > 0 && table->stats[ index ]. counts == table->stats[ i-1 ].counts ) i--; if ( i != index ) { temp = table->stats[ index ].symbol; table->stats[ index ].symbol = table->stats[ i ].symbol; table->stats[ i ].symbol = temp; if ( table->links != NULL ) { temp_ptr = table->links[ index ].next; table->links[ index ].next = table->links[ i ].next; table->links[ i ].next = temp_ptr; } index = 1; } /* * The switch has been performed, now I can update the counts */ table->stats[ index ].counts++; if ( table->stats[ index ].counts == 255 ) rescale_table( table ); } /* * This routine is called when a given symbol needs to be encoded. * It is the job of this routine to find the symbol in the context * table associated with the current table, and return the low and * high counts associated with that symbol, as well as the scale. * Finding the table is simple. Unfortunately, once I find the table, * I have to build the table of cumulative counts, which is * expensive, and is done elsewhere. If the symbol is found in the * table, the appropriate counts are returned. If the symbol is * not found, the ESCAPE symbol probabilities are returned, and * the current order is reduced. Note also the kludge to support * the order -2 character set, which consists of negative numbers * instead of unsigned chars. This insures that no match will ever * be found for the EOF or FLUSH symbols in any "normal" table. */ int convert_int_to_symbol( c, s ) int c; SYMBOL *s; { int i; CONTEXT *table; table = contexts[ current_order ]; totalize_table( table ); s->scale = totals[ 0 ]; if ( current_order == –2 ) c = -c; for ( i = 0 ; i <= table->max_index ; i++ ) { if ( c == (int) table->stats[ i ].symbol ) { if ( table->stats[ i ].counts == 0 ) break; s->low_count = totals[ i+2 ]; s->high_count = totals[ i+1 ]; return( 0 ); } } s->low_count = totals[ 1 ]; s->high-count = totals[ 0 ]; current_order––; return( 1 ); } /* * This routine is called when decoding an arithmetic number. In * order to decode the present symbol, the current scale in the * model must be determined. This requires looking up the current * table, then building the totals table. Once that is done, the * cumulative total table has the symbol scale at element 0. */ void get_symbol_scale( s) SYMBOL *s; { CONTEXT *table; table = contexts[ current_order ]; totalize_table( table ); s->scale = totals[ 0 ]; } /* * This routine is called during decoding. It is given a count that * came out of the arithmetic decoder, and has to find the symbol that * matches the count. The cumulative totals are already stored in the * totals[] table, from the call to get_symbol-scale, so this routine * just has to look through that table. Once the match is found, * the appropriate character is returned to the caller. Two possible * complications. First, the character might be the ESCAPE character, * in which case the current_order has to be decremented. The other * complication. First, the character might be the ESCAPE character, * in which case the current_order has to be decremented. The other * complication is that the order might be -2, in which case we return * the negative of the symbol so it isn't confused with a normal * symbol. */ int convert_symbol_to_int( count, s ) int count; SYMBOL *s; { int c; CONTEXT *table; table - contexts[ current_order ]; for ( c = 0; count < totals[ c ] ; c++ ) ; s->high_count = totals[ c - 1 ]; s->low_count = totals[ c ]: if ( c == 1 ) { current_order––; return( ESCAPE ); } if ( current_order < -1 ) return( (int) -table->stats[ c-2 ].symbol ); else return( table->stats[ c-2 ].symbol ); } /* * After the model has been updated for a new character, this routine * is called to "shift" into the new context. For example, if the * last context was "ABC", and the symbol 'D' had just been processed, * this routine would want to update the context pointers to that * context[1]=="D", contexts[2]=="CD" and contexts[3]=="BCD". The * potential problem is that some of these tables may not exist. * The way this is handled is by the shift_to_next_context routine. * It is passed a pointer to the "ABC" context, along with the symbol * 'D', and its job is to return a pointer to "BCD". Once we have * "BCD", we can follow the lesser context pointers in order to get * the pointers to "CD" and "C". The hard work was done in * shift_to_context(). */ void add_character_to_model( c ) int c; { int i; if ( max_order < 0 || c < 0 ) return; contexts[ max_order ] = shift_to_next_context( contexts[ max_order ], c, max_order ); for ( i = max_order-1 ; i > 0 ; i–– ) contexts[ i ] = contexts[ i+1 ]->lesser_context; } /* * This routine is called when adding a new character to the model. From * the previous example, if the current context was "ABC", and the new * symbol was 'D', this routine would get called with a pointer to * context table "ABC", and symbol 'D', with order max_order. What this * routine needs to do then is to find the context table "BCD". This * should be an easy job, and it is if the table already exists. All * we have to in that case is follow the back pointer from "ABC" to "BC". * We then search the link table of "BC" until we find the link to "D". * That link points to "BCD", and that value is then returned to the * caller. The problem crops up when "BC" doesn't have a pointer to * "BCD". This generally means that the "BCD" context has not appeared * yet. When this happens, it means a new table has to be created and * added to the "BC" table. That can be done with a single call to * the allocate_new_table routine. The only problem is that the * allocate_new_table routine wants to know what the lesser context for * the new table is going to be. In other words, when I create "BCD", * I need to know where "CD" is located. In order to find "CD", I * have to recursively call shift_to_next_context, passing it a pointer * to context "C" and the symbol 'D'. It then returns a pointer to * "CD", which I use to create the "BCD" table. The recursion is * guaranteed to end if it ever gets to order -1, because the null table * is guaranteed to have a link for every symbol to the order 0 table. * This is the most complicated part of the modeling program, but it is * necessary for performance reasons. */ CONTEXT *shift_to_next_context( table, c, order ) CONTEXT *table; int c; int order; { int i; CONTEXT *new_lesser; /* * First, try to find the new context by backing up to the lesser * context and searching its link table. If I find the link, we take * a quick and easy exit, returning the link. Note that there is a * special kludge for context order 0. We know for a fact that * the lesser context pointer at order 0 points to the null table, * order -1, and we know that the -1 table only has a single link * pointer, which points back to the order 0 table. */ table = table->lesser_context; if ( order == 0 ) return( table->links[ 0 ].next ); for ( i = 0 ; i <= table->max_index ; i++ ) if ( table->stats[ i ].symbol == (unsigned char) c ) if ( table->links[ i ].next != NULL) return( table->links[ i ].next ); else break; /* * If I get here, it means the new context did not exist. I have to * create the new context, add a link to it here, and add the backwards * link to *his* previous context. Creating the table and adding it to * this table is pretty easy, but adding the back pointer isn't. Since * creating the new back pointer isn't easy, I duck my responsibility * and recurse to myself in order to pick it up. */ new_lesser = shift_to_next_context( table, c, order-1 ); /* * Now that I have the back pointer for this table, I can make a call * to a utility to allocate the new table. */ table = allocate_next_order_table( table, c, new_lesser ); return( table ); } /* * Rescaling the table needs to be done for one of three reasons. * First, if the maximum count for the table has exceeded 16383, it * means that arithmetic coding using 16 and 32 bit registers might * no longer work. Secondly, if an individual symbol count has * reached 255, it will no longer fit in a byte. Third, if the * current model isn't compressing well, the compressor program may * want to rescale all tables in order to give more weight to newer * statistics. All this routine does is divide each count by 2. * If any counts drop to 0, the counters can be removed from the * stats table, but only if this is a leaf context. Otherwise, we * might cut a link to a higher order table. */ void rescale_table( table ) CONTEXT *table; { int i; if ( table->max_index == -1 ) return; for ( i = 0 ; i <= table->max_index ; i ++ ) table->stats[ i ].counts /= 2; if ( table->stats[ table]>max_index ].counts == 0 && table->links == NULL ) { while ( table->stats[ table->max_index ].counts == 0 && table->max_index >= 0 ) table->max_index––; if ( table->max_index == -1 ) { free( (char *) table->stats ); table->stats = NULL; } else { table->stats = (STATS *) realloc( (char *) table->stats, sizeof( STATS ) * ( table->max_index + 1 ) ); if ( table->stats == NULL ) fatal_error( "Error #11: reallocating stats space!" ); } } } /* * This routine has the job of creating a cumulative totals table for * a given context. The cumulative low and high for symbol c are going to * be stored in totals[c+2] and totals[c+1]. Locations 0 and 1 are * reserved for the special ESCAPE symbol. The ESCAPE symbol * count is calculated dynamically, and changes based on what the * current context looks like. Note also that this routine ignores * any counts for symbols that have already shown up in the scoreboard, * and it adds all new symbols found here to the scoreboard. This * allows us to exclude counts of symbols that have already appeared in * higher order contexts, improving compression quite a bit. */ void totalize_table( table ) CONTEXT *table; { int i; unsigned char max; for ( ; ; ) { max = 0; i = table->max_index + 2; totals[ i ] = 0; for ( ; i > 1 ; i- ) { totals[ i-1 ] = totals[ i ]; if ( table->stats[ i-2 ].counts ) if ( ( current_order == -2 ) || scoreboard[ table->stats[ i-2 ].symbol ] == 0 ) totals[ i-1 ] += table->stats[ i-2].counts; if ( table->stats[ i-2 ].counts > max ) max = table->stats[ i-2 ].counts; } /* * Here is where the escape calculation needs to take place. */ if ( max == 0 ) totals[ 0 ] = 1; else { totals[ 0 ] = (short int) ( 256 - table->max_index ); totals[ 0 ] *= table->max_index; totals[ 0 ] /= 256; totals[ 0 ] /= max; totals[ 0 ]++; totals[ 0 ] += totals[ 1 ]; } if ( totals[ 0 ] < MAXIMUM_SCALE ) break; rescale_table( table ); } for ( i = 0 ; i < table->max_index ; i++ ) if (table->stats[i].counts != 0) scoreboard[ table->stats[ i ].symbol ] = 1; } /* * This routine is called when the entire model is to be flushed. * This is done in an attempt to improve the compression ratio by * giving greater weight to upcoming statistics. This routine * starts at the given table, and recursively calls itself to * rescale every table in its list of links. The table itself * is then rescaled. */ void recursive_flush( table ) CONTEXT *table; { int i; if ( table->links != NULL ) for ( i = 0 ; i <= table->max_index ; i++ ) if ( table->links[ i ].next != NULL ) recursive_flush( table->links[ i ].next ); rescale_table( table ); } /* * This routine is called to flush the whole table, which it does * by calling the recursive flush routine starting at the order 0 * table. */ void flush_model() { putc( 'F', stdout ); recursive_flush( contexts[ 0 ] ); } /* * Everything from here down define the arithmetic coder section * of the program. */ */ * These four variables define the current state of the arithmetic * coder/decoder. They are assumed to be 16 bits long. Note that * by declaring them as short ints, they will actually be 16 bits * on most 80X86 and 680X0 machines, as well as VAXen. */ static unsigned short int code;/* The present input code value */ static unsigned short int low; /* Start of the current code range */ static unsigned short int high;/* End of the current code range */ long underflow_bits; /* Number of underflow bits pending */ /* * This routine must be called to initialize the encoding process. * The high register is initialized to all 1s, and it is assumed that * it has an infinite string of 1s to be shifted into the lower bit * positions when needed. */ void initialize_arithmetic_encoder() { low = 0; high = 0xffff; underflow_bits = 0; } /* * At the end of the encoding process, there are still significant * bits left in the high and low registers. We output two bits, * plus as many underflow bits as are necessary. */ void flush_arithmetic_encoder( stream ) BIT_FILE *stream; { OutputBit( stream, low & 0x4000 ); underflow_bits++; while ( underflow_bits-- > 0 ) OutputBit( stream, ~low & 0X4000 ); OutputBits( stream, 0L, 16 ); } /* * This routine is called to encode a symbol. The symbol is passed * in the SYMBOL structure as a low count, a high count, and a range, * instead of the more conventional probability ranges. The encoding * process takes two steps. First, the values of high and low are * updated to take into account the range restriction created by the * new symbol. Then, as many bits as possible are shifted out to * the output stream. Finally, high and low are stable again and * the routine returns. */ void encode_symbol( stream, s ) BIT_FILE *stream; SYMBOL *s; { long range; /* * These three lines rescale high and low for the new symbol. */ range = (long) ( high-low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale -1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale ); /* * This loop turns out new bits until high and low are far enough * apart to have stabilized. */ for ( ; ; ) { /* * If this test passes, it means that the MSDigits match, and can * be sent to the output stream. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { OutputBit( stream, high & 0x8000 ); while ( underflow_bits > 0 ) { OutputBit( stream, ~high & 0x8000 ); underflow_bits--; } } /* * If this test passes, the numbers are in danger of underflow, because * the MSDigits don't match, and the 2nd digits are just one apart. */ else if ( ( low & 0x4000 ) && !( high & 0x4000 )) { underflow_bits += 1; low &= 0x3fff; high |= 0x4000; } else return ; low <<= 1; high <<= 1; high |= 1; } } /* * When decoding, this routine is called to figure out which symbol * is presently waiting to be decoded. This routine expects to get * the current model scale in the s->scale parameter, and it returns * a count that corresponds to the present floating point code; * * code = count / s->scale */ short int get_current_count( s ) SYMBOL *s; { long range; short int count; range = (long) ( high - low ) + 1; count = (short int) ((((long) ( code - low ) + 1 ) * s->scale-1 ) / range ); return( count ); } /* * This routine is called to initialize the state of the arithmetic * decoder. This involves initializing the high and low registers * to their conventional starting values, plus reading the first * 16 bits from the input stream into the code value. */ void initialize_arithmetic_decoder( stream ) BIT_FILE *stream; { int i; code = 0; for ( i = 0 ; i < 16 ; i++ ) { code <<= 1; code += InputBit( stream ); } low = 0; high = 0xffff; } /* * Just figuring out what the present symbol is doesn't remove * it from the input bit stream. After the character has been * decoded, this routine has to be called to remove it from the * input stream. */ void remove_symbol_from_stream( stream, s ) BIT_FILE *stream; SYMBOL *s; { long range; /* * First, the range is expanded to account for the symbol removal. */ range = (long)( high - low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale -1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale ); /* * Next, any possible bits are shipped out. */ for ( ; ; ) { /* * If the MSDigits match, the bits will be shifted out. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { } /* * Else, if underflow is threatening, shift out the 2nd MSDigit. */ else if ((low & 0x4000) == 0x4000 && (high & 0x4000) == 0 ) { code ^= 0x4000; low &= 0x3fff; high |= 0x4000; } else /* * Otherwise, nothing can be shifted out, so I return. */ return; low <<= 1; high <<= 1; high |= 1; code <<= 1; code += InputBit( stream ); } } /************************** End of ARITH-N.C ***************************/

Previous | Table of Contents | Next |