Table of Contents


Appendix B
Test Programs

One of the problems I noticed when testing data compression programs for the Dr. Dobb’s programming contest of 1991 was that of inadequate testing. Many of the programs I was given failed on several of the test files in our compression database.

After good design, the best weapon to use against these kind of errors is exhaustive testing. I have a test program I use under MS-DOS, named CHURN, which can apply a compression program to every file on a disk volume, performing both compression, decompression, then compare the input to the output. This has proven very helpful to me when testing the software in this book.

The MS-DOS version of the program is given below. Instructions on how to use this program are found in the program listing.

/**************************START OF CHURN.C****************************
*
*  This is a utility program used to test compression/decompression
*  programs for accuracy, speed, and compression ratios.  CHURN is
*  called with three arguments.  The first is a root directory.  CHURN
*  will attempt to compress  and then decompress every file in and under
*  the specified root directory.  The next parameter on the command
*  line is the compression command.  CHURN needs to compress the input
*  file to a file called TEST.CMP.  The compression command tells CHURN
*  how to do this.  CHURN will execute the compression command by
*  passing the command line to DOS using the system() function call.
*  It attempts to insert the file name into the compression command by
*  calling sprintf(), with the file name as an argument.  This means that
*  if the compression command has a %s anywhere in it, the name of the
*  input file should be substituted for it.  Finally, the third argument
*  on the command line should be the command  CHURN needs to spawn to
*  decompress TEST.CMP to TEST.OUT.
*
*  An example of how this works using programs created in this book
*  would look like this:
*
*  CHURN C:\ "LZSS-C %%s test.cmp" "LZSS-E test.cmp test.out"
*
* The doubled up % symbols are there to defeat variable substitution
* under some command-line interpreters, such as 4DOS.
*
*  A more complicated example testing PKZIP might look like this:
*
*  CHURN C:\ "TEST %%S"  "PKUNZIP TEST.CMP"
*
*  where TEST.BAT had two lines that look like this:
*
*  COPY %1 TEST.OUT
*  PKZIP -M TEST.CMP TEST.OUT
*
*  CHURN stores a summary of compression in a file called CHURN.LOG.  This
*  file could be used for further analysis by other programs.
*
*  To abort this program while it is running, don't start pounding away
*  on the BREAK or CTRL-C keys.  They will just get absorbed by the
*  compression program.  Instead, hit a single key, which will be detected
*  by CHURN, and used as an abort signal.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <process.h>
#include <conio.h>
#include <dos.h>

/*
*  The findfirst and findnext functions operate nearly identically
*  under TurboC and MSC.  The only difference is that the functions
*  names, structures, and structure elements all have different names.
*  I just create macros for these things and redefine them appropriately
*  here.
*/

#ifdef__TURBOC__

#include <dir.h>
#define FILE_INFO                     struct ffblk
#define FIND_FIRST( name, info ) findfirst( ( name ), (info ), FA_DIREC )
#define FIND_NEXT( info )             findnext( ( info ) )
#define FILE_IS_DIR( info )           ( ( info ).ff_attrib & FA_DIREC )
#define FILE_NAME( info )             ( ( info ).ff_name )

#else

#define MSDOS 1
#define FILE_INFO                     struct find_t
#define FIND_FIRST( name, info )      _dos_findfirst( ( name ), _A_SUBDIR,
            ( info ) )
#define FIND_NEXT( info )             _dos_findnext( ( info ) )
#define FILE_IS_DIR( info )           ( ( info ).attrib & _A_SUBDIR )
#define FILE_NAME( info )             ( ( info ).name )

#endif

/*
*  Some global variables.
*/

int total_files:
int total_passed;
int total_failed;
char *compress_command;
char *expand_command;

FILE *input;
FILE *output;
FILE *compressed;
FILE *log_file;

/*
*  Declarations for global routines.
*/

void churn_files( char *path );
int file_is_already_compressed( char *name );
void close_all_the_files( void );
int compress( char *file_name );
void usage_exit( void );

/*
*  main() doesn't have to do a whole lot in this program.  It
*  reads in the command line to determine what the root directory
*  to start looking at is, then it initializes the total byte counts
*  and the start time.  It can then call churn_files(), which does all
*  the work, then report on the statistics resulting from churn_files.
*/

void main( int argc, char * argv[] )
{
  time_t start_time;
  time_t stop_time;
  char root_dir[ 81 ];

  if ( argc != 4 )
     usage_exit();
  strcpy( root_dir, argv [  1 ];
  if ( root_dir[ strlen( root_dir ) - 1 ]  != `\\' )
     strcat( root_dir. "\\" );
  compress_command = argv[ 2 ];
  expand_command = argv[ 3 ];

  setbuf( stdout, NULL );
  setbuf( stderr, NULL );
  total_files = 0;
  total_passed = 0;
  total_failed = 0;
  log_file = fopen( "CHURN.LOG", "w" );
  if ( log_file == NULL ) {
    printf( "Couldn't open the log file!\n" );
    exit( 1 );
  }
  fprintf( log_file, "                     "
       "Original Packed\n" );
  fprintf( log_file, "          File Name   "
       " Size Size Ratio  Result\n" );
  fprintf( log_file, "_________________    "
       " _____ _____ ___ ____\n" );
  time( &start_time );
  churn_files( root_dir );
  time( &stop_time );
  fprintf( log_file, "\nTotal elapsed time: %f seconds\n",
       difftime( stop_time, start_time ) );
  fprintf( log_file, "Total files: %d\n", total_files );
  fprintf( log_file, "Total passed: %d\n", total_passed );
  fprintf( log_file, "Total failed: %d\n", total_failed );
}
/*
* churn_files() is a routine that sits in a loop looking at
* files in the directory specified by its single argument, "path".
* As each file is looked at, one of three things happens.  If it
* is a normal file, and has a compressed extension name, like ".ZIP",
* the file is ignored.  If it is a normal file, and doesn't have a
* compressed extension name, it is compressed and decompressed by
* another routine.  Finally, if the file is a subdirectory,
* churn_files() is called recursively with the file name as its
* path argument.  This is one of those rare routines where recursion
* provides a way to truly simplify the task at hand.
*/

void churn_files( char *path )
{
  FILE_INFO file_info;
  int result;
  char full_name[ 81 ];
  strcpy( full_name, path );
  strcat( full_name, "*.*" );
  result = FIND_FIRST( full_name, &file_info );

  while ( result == 0 ) {
    if ( kbhit() ) {
        getch();
        exit(0);
    }
    if ( FILE_IS_DIR( file_info ) ) {
      if ( FILE-NAME( file_info )[ 0 ] != '.' ) {
        strcpy( full_name, path );
        strcat( full_name, FILE_NAME( file-info) );
        strcat( full_name, "\\" );
        churn_files( full_name );
      }
    } else {
      strcpy( full_name, path );
      strcat( full_name, FILE_NAME( file_info ) );
      if ( !file_is_already_compressed( full_name ) ) {
        fprintf( stderr, "Testing %s\n", full_name );
        if ( !compress( full_name ) )
          fprintf( stderr, "Comparison failed!\n );
      }
    }
    result = FIND_NEXT( &file_info );
  }
}

/*
* The job of this routine is simply to check on the file
* whose name is passed as an argument.  The file extension is compared
* against a list of standard extensions that are commonly used on
* compressed files.  If it matches one of these names, we assume it is
* compressed and return a TRUE, otherwise FALSE is returned.
*
* Note that when checking a compression routine for accuracy, it is
* probably a good idea to stub out this routine.  Trying to compress
* "uncompressible" files is a very good exercise for a compression
* program.  It is probably not a good idea when checking compression
* ratios, however.
*/

int file_is_already_compressed( char *name )
{
  char *extension;
  static char *matches[]={ "ZIP", "ICE", "LZH", "ARC", "GIF", "PAK",
                           "ARJ", NULL };
  int i;

  extension=strchr( name, '.' );
  if ( extension++ == NULL )
    return( 0 );
  i = 0;
  while ( matches[ i ] != NULL )
       if ( strcmp( extension, matches[ i++ ] ) == 0 )
         return( 1 );
  return( 0 );
}

/*
* This is the routine that does the majority of the work for
* this program.  It takes a file whose name is passed here.  It first
* compresses, then decompresses that file.  It then compares the file
* to the decompressed output, and reports on the results.
*/

int compress( char *file_name )
{
  long new_size;
  long old_size;
  int c;
  char command[ 132 ];

  printf( "%s\n", file_name );
  fprintf( log_file, "%-40s ", file_name );
  sprintf( command, compress_command, file_name );
  system( command );
  system( command, expand_command, file_name );
  system( command );

  input = fopen( file_name, "rb" );
  output = fopen( "TEST.OUT", "rb" );
  compressed = fopen( "TEST.CMP", "rb" );

  total_files++;
  if ( input == NULL || output == NULL || compressed == NULL ) {
    total_failed++;
    close_all_the_files();
    fprintf( log_file, "Failed, couldn't open file!\n" );
    return( 0 );
  }

  fseek( input, OL, SEEK_END );
  old_size = ftell( input );
  fseek( input, OL, SEEK_SET );
  fseek( compressed, OL, SEEK_END );
  new_size = ftell( compressed );

  fprintf( log_file, "%8ld %8ld ", old_size, new_size );
  if ( old_size == 0L )
    old_size = 1L;
  fprintf( log_file, "%41d%% ",
    100L - ( ( 100L * new_size ) / old_size ) );
  do {
    c = getc( input );
    if ( getc( output ) != c ) {
      fprintf( log_file, "Failed\n" );
      total_failed++;
      close_all_the_files();
      return( 0 );
    }
  }
  while ( c != EOF );
  fprintf( log_file, "Passed\n" );
  close_all_the_files();
  total_passed++;
  return( 1 );
}

void close_all_the_files()
{
  if ( input != NULL )
    fclose( input );
  if ( output != NULL )
    fclose( output );
  if ( compressed != NULL )
    fclose( compressed );
}

/*
* This routine is used to print out basic instructions for the use
* of CHURN, and then exit.
*/

void usage_exit( void )
{
  char *usage = "CHURN 1.0. Usage: CHURN root-dir \"compress "
                "command\" \"expand command\n"
                "\n"
                "CHURN is used to test compression programs. "
                "It does this by compressing\n"
                "then expanding all of the files in and under "
                "the specified root dir.\n"
                "\n"
                "For each file it finds, CHURN first executes "
                "the compress command to create a\n"
                "compressed file called TEST.CMP.  It then "
                "executes the expand command to\n"
                "create a file called TEST.OUT.  CHURN then "
                "compares the two file to make sure\n"
                "the compression cycle worked properly.\n"
                "\n"
                "The file name to be compressed will be "
                "inserted into the compress command\n"
                "using sprintf, with any %%s argument being "
                "substituted with the name of the\n"
                "file being compressed.  Note that the "
                "compress and expand commands should be\n"
                "enclosed in double quotes so that multiple "
                "words can be included in the\n"
                "printf( " commands.\n"
                "\n"
                "Note that you may have to double the %% "
                "character on your command line to get\n"
                "around argument substitution under some "
                "command processors.  Finally, note that\n"
                "CHURN executes the compression program "
                "using a system() function call, so\n"
                "batch files can be used to execute complex\n"
                "compression sequences.\n"
                "\n"
                "Example:  CHURN C:\\ \"LZSS-C %%%%s TEST.CMP\"
                "\"LZSS-C TEST.CMP TEST.OUT\"";
  puts( usage );
  exit( 1 );
}


Table of Contents