[Back to DATATYPE SWAG index]  [Back to Main SWAG index]  [Original]

{
I want to use LZH compression for a backup module in one of my programs. I
found a great working source code. I'll post it here ... only problem I have
is that it's kinda slow ... I need to compress a file of 4 Mb ... this file
contains a lot of empty space. I know this routine could be speeded up a LOT.
Here's how ... (I didn't come up with the idea)

      bytes (i.e. a file full of blanks, or nuls).  I believe
      this would be improved by preceding the encoding with
      run length compression, using 90h as the encodeing signal,
      so that <char> 90h nn (with 2 <= nn <= 255) represents
      <char> followed by nn repetitions, i.e. at least a total
      of nn+1 occurences of <char>.  <90h 0> would represent 90h
      itself, and 90h cannot be run length encoded.  <90h 1>
      would represent EOF, thus embedding a specific EOF marker
      in the file.  This allows use where the actual file length
      is unknown before it is reached, i.e. in communications.

See, this guy says it's possible, now it's up to you guys to do it, I'm not
good experienced enough to come up with it myself.
Hope you can help, in the next 3 messages you'll find the LZH code.
}


{$A+,B-,D+,E+,F-,I-,L+,N+,O-,R-,S-,V-}
{$M 16384,0,655360}
program LZH_Test;
uses
  LZH;
type
  IObuf = array[0..10*1024-1] of byte;
var
  infile,outfile: file;
  ibuf,obuf: IObuf;
  s: String;

  procedure Error (msg: String);
  begin
    writeln(msg);
    HALT(1)
  end;

{$F+}
  procedure ReadNextBlock;
{$F-}
  begin
    inptr:= 0;
    BlockRead(infile,inbuf^,sizeof(ibuf),inend);
    if IoResult>0 then Error('! Error reading input file');
  end;

{$F+}
  procedure WriteNextBlock;
{$F-}
  var
    wr: word;
  begin
    BlockWrite(outfile,outbuf^,outptr,wr);
    if (IoResult>0) or (wr<outptr) then
      Error('! Error writing output file');
    outptr:= 0
  end;

  procedure OpenInput (fn: String);
  begin
    assign(infile,fn); reset(infile,1);
    if IoResult>0 then Error('! Can''t open input file');
    inbuf:= @ibuf;
    ReadToBuffer:= ReadNextBlock;
    ReadToBuffer;
  end;

  procedure OpenOutput (fn: String);
  begin
    assign(outfile,fn); rewrite(outfile,1);
    if IoResult>0 then Error('! Can''t open output file');
    outbuf:= @obuf;
    outend:= sizeof(obuf);
    outptr:= 0;
    WriteFromBuffer:= WriteNextBlock;
  end;

begin {main}
   if ParamCount<>3 then begin
     writeln('Usage: lzhuf e(compression)|d(uncompression) infile outfile');
     HALT(1)
   end;
   OpenInput(ParamStr(2));
   OpenOutput(ParamStr(3));
   s:= ParamStr(1);
   case s[1] of
     'e','E': Encode(filesize(infile));
     'd','D': Decode
   else
     Error('! Use [D] for Decompression or [E] for Compression')
   end;
   close(infile); if IoResult>0 then Error('! Error closing input file');
   if outptr>0 then WriteNextBlock;
   close(outfile); if IoResult>0 then Error('! Error closing output file');
end.


{ LZHUF.C English version 1.0
  Based on Japanese version 29-NOV-1988
  LZSS coded by Haruhiko OKUMURA
  Adaptive Huffman Coding coded by Haruyasu YOSHIZAKI
  Edited and translated to English by Kenji RIKITAKE
  Converted to Turbo Pascal 5.0
    by Peter Sawatzki with assistance of Wayne Sullivan
}
{$i-,r-,v-,s-}
Unit LZH;
Interface
type
  bufar = array[0..0] of byte; {will be overindexed}
var
  WriteFromBuffer,
  ReadToBuffer: procedure;
  inbuf,outbuf: ^bufar;
  inptr,inend,outptr,outend: word;

  procedure Encode (bytes: LongInt);
  procedure Decode;

Implementation
Const
{-LZSS Parameters}
  N         = 4096; {Size of string buffer}
  F         = 60;   {60 Size of look-ahead buffer}
  THRESHOLD = 2;
  NODENIL   = N;    {End of tree's node}

{-Huffman coding parameters}
  N_CHAR    = 256-THRESHOLD+F;
                            {character code (= 0..N_CHAR-1)}
  T         = N_CHAR*2 -1;  {Size of table}
  R         = T-1;          {root position}
  MAX_FREQ  = $8000; {update when cumulative frequency reaches to this value}

{-Tables for encoding/decoding upper 6 bits of sliding dictionary pointer}
{-encoder table}
p_len: array[0..63] of byte =
       ($03,$04,$04,$04,$05,$05,$05,$05,$05,$05,$05,$05,$06,$06,$06,$06,
        $06,$06,$06,$06,$06,$06,$06,$06,$07,$07,$07,$07,$07,$07,$07,$07,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08);

p_code: array[0..63] of byte =
       ($00,$20,$30,$40,$50,$58,$60,$68,$70,$78,$80,$88,$90,$94,$98,$9C,
        $A0,$A4,$A8,$AC,$B0,$B4,$B8,$BC,$C0,$C2,$C4,$C6,$C8,$CA,$CC,$CE,
        $D0,$D2,$D4,$D6,$D8,$DA,$DC,$DE,$E0,$E2,$E4,$E6,$E8,$EA,$EC,$EE,
        $F0,$F1,$F2,$F3,$F4,$F5,$F6,$F7,$F8,$F9,$FA,$FB,$FC,$FD,$FE,$FF);

{-decoder table}
d_code: array[0..255] of byte =
       ($00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
        $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,
        $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,
        $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,
        $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,
        $04,$04,$04,$04,$04,$04,$04,$04,$05,$05,$05,$05,$05,$05,$05,$05,
        $06,$06,$06,$06,$06,$06,$06,$06,$07,$07,$07,$07,$07,$07,$07,$07,
        $08,$08,$08,$08,$08,$08,$08,$08,$09,$09,$09,$09,$09,$09,$09,$09,
        $0A,$0A,$0A,$0A,$0A,$0A,$0A,$0A,$0B,$0B,$0B,$0B,$0B,$0B,$0B,$0B,
        $0C,$0C,$0C,$0C,$0D,$0D,$0D,$0D,$0E,$0E,$0E,$0E,$0F,$0F,$0F,$0F,
        $10,$10,$10,$10,$11,$11,$11,$11,$12,$12,$12,$12,$13,$13,$13,$13,
        $14,$14,$14,$14,$15,$15,$15,$15,$16,$16,$16,$16,$17,$17,$17,$17,
        $18,$18,$19,$19,$1A,$1A,$1B,$1B,$1C,$1C,$1D,$1D,$1E,$1E,$1F,$1F,
        $20,$20,$21,$21,$22,$22,$23,$23,$24,$24,$25,$25,$26,$26,$27,$27,
        $28,$28,$29,$29,$2A,$2A,$2B,$2B,$2C,$2C,$2D,$2D,$2E,$2E,$2F,$2F,
        $30,$31,$32,$33,$34,$35,$36,$37,$38,$39,$3A,$3B,$3C,$3D,$3E,$3F);

d_len: array[0..255] of byte =
       ($03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,
        $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,
        $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,
        $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,
        $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,
        $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,
        $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,
        $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,
        $08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08,$08);

  getbuf: word = 0;
  getlen: byte = 0;
  putbuf: word = 0;
  putlen: word = 0;

  textsize: LongInt = 0;
  codesize: LongInt = 0;
  printcount: LongInt = 0;

var
  text_buf: array[0..N + F - 2] of byte;
  match_position, match_length: word;
  lson,dad: array[0..N] of word;
  rson:     array[0..N + 256] of word;

  freq: array[0..T] of word; {cumulative freq table}

{-pointing parent nodes. area [T..(T + N_CHAR - 1)] are pointers for leaves}
  prnt: array [0..T+N_CHAR-1] of word;

{-pointing children nodes (son[], son[] + 1)}
  son: array[0..T-1] of word;

  function getc: byte;
  begin
    getc:= inbuf^[inptr];
    Inc(inptr);
    if inptr=inend then ReadToBuffer
  end;

  procedure putc (c: byte);
  begin
    outbuf^[outptr]:= c;
    Inc(outptr);
    if outptr=outend then
      WriteFromBuffer
  end;

procedure InitTree;
{-Initializing tree}
var
  i: word;
begin
  for i:= N+1 to N+256 do rson[i] := NODENIL; {root}
  for i:= 0 to N-1 do     dad[i]  := NODENIL; {node}
end;

procedure InsertNode (r: word);
{-Inserting node to the tree}
Label
  Done;
var
  i,p: word;
  geq: boolean;
  c: word;
begin
  geq:= true;
  p:= N+1+text_buf[r];
  rson[r]:= NODENIL;
  lson[r]:= NODENIL;
  match_length := 0;
  while TRUE do begin
    if geq then
      if rson[p]=NODENIL then begin
        rson[p]:= r;
        dad[r] := p;
        exit
      end else
        p:= rson[p]
    else
      if lson[p]=NODENIL then begin
        lson[p]:= r;
        dad[r] := p;
        exit
      end else
        p:= lson[p];
    i:= 1;
    while (i<F) AND (text_buf[r+i]=text_buf[p+i]) do Inc(i);
    geq:= (text_buf[r+i]>=text_buf[p+i]) or (i=F);

    if i>THRESHOLD then begin
      if i>match_length then begin
        match_position := (r-p) AND (N-1) -1;
        match_length:= i;
        if match_length>=F then goto done;
      end;
      if i=match_length then begin
        c:= (r-p) AND (N-1) -1;
        if c<match_position then match_position:= c
      end
    end
  end;
  Done:
  dad[r]:= dad[p];
  lson[r]:= lson[p];
  rson[r]:= rson[p];
  dad[lson[p]]:= r;
  dad[rson[p]]:= r;
  if rson[dad[p]]=p then
    rson[dad[p]]:= r
  else
    lson[dad[p]]:= r;
  dad[p]:= NODENIL; {remove p}
end;

procedure DeleteNode (p: word);
{-Delete node from the tree}
var
  q: word;
begin
  if dad[p] =NODENIL then exit; {unregistered}
  if rson[p]=NODENIL then q:= lson[p] else
  if lson[p]=NODENIL then q:= rson[p] else begin
    q:= lson[p];
    if rson[q]<>NODENIL then begin
      repeat
        q:= rson[q];
      until rson[q]=NODENIL;
      rson[dad[q]]:= lson[q];
      dad[lson[q]]:= dad[q];
      lson[q]:= lson[p];
      dad[lson[p]]:= q;
    end;
    rson[q]:= rson[p];
    dad[rson[p]]:= q;
  end;
  dad[q]:= dad[p];
  if rson[dad[p]]=p then
    rson[dad[p]]:= q
  else
    lson[dad[p]]:= q;
  dad[p]:= NODENIL;
end;

function GetBit: byte;
{-get one bit}
begin
  while getlen<=8 do begin
    getbuf:= getbuf OR (WORD(getc) SHL (8-getlen));
    Inc(getlen,8);
  end;
  GetBit:= getbuf SHR 15;
  {if (getbuf AND $8000)>0 then GetBit:= 1 else GetBit:= 0;}
  getbuf:= getbuf SHL 1;
  Dec(getlen);
end;

function GetByte: Byte;
{-get a byte}
begin
  while getlen<=8 do begin
    getbuf:= getbuf OR (WORD(getc) SHL (8 - getlen));
    Inc(getlen,8);
  end;
  GetByte:= Hi(getbuf);
  getbuf:= getbuf SHL 8;
  Dec(getlen,8);
end;

procedure Putcode (l: byte; c: word);
{-output l bits}
begin
  putbuf:= putbuf OR (c SHR putlen);
  Inc(putlen,l);
  if putlen>=8 then begin
    putc(Hi(putbuf));
    Dec(putlen,8);
    if putlen>=8 then begin
      putc(Lo(putbuf));
      Inc(codesize,2);
      Dec(putlen,8);
      putbuf:= c SHL (l-putlen);
    end else begin
      putbuf:= Swap(putbuf AND $FF); {SHL 8;}
      Inc(codesize);
    end
  end
end;

procedure StartHuff;
{-initialize freq tree}
var
  i,j: word;
begin
  for i:= 0 to N_CHAR-1 do begin
    freq[i]:= 1;
    son[i] := i+T;
    prnt[i+T]:= i
  end;
  i:= 0; j:= N_CHAR;
  while j<=R do begin
    freq[j]:= freq[i]+freq[i+1];
    son[j] := i;
    prnt[i]:= j;
    prnt[i+1]:= j;
    Inc(i,2); Inc(j)
  end;
  freq[T]:= $FFFF;
  prnt[R]:= 0;
end;



procedure reconst;
{-reconstruct freq tree }
var
  i,j,k,f,l: word;
begin
  {-halven cumulative freq for leaf nodes}
  j:= 0;
  for i:= 0 to T-1 do
    if son[i]>=T then begin
      freq[j]:= (freq[i]+1) SHR 1;
      son[j] := son[i];
      Inc(j)
    end;
  {-make a tree : first, connect children nodes}
  i:= 0; j:= N_CHAR;
  while j<T do begin
    k:= i+1;
    f:= freq[i]+freq[k];
    freq[j]:= f;
    k:= j-1;
    while f<freq[k] do Dec(k);
    Inc(k);
    l:= (j-k)*2;

    move(freq[k],freq[k+1],l);
    freq[k]:= f;
    move(son[k],son[k+1],l);
    son[k]:= i;
    Inc(i,2);
    Inc(j)
  end;
  {-connect parent nodes}
  for i:= 0 to T-1 do begin
    k:= son[i];
    prnt[k]:= i;
    if k<T then
      prnt[k+1]:= i
  end
end;

procedure update(c: word);
{-update freq tree}
var
  i,j,k,l: word;
begin
  if freq[R]=MAX_FREQ then reconst;
  c:= prnt[c+T];
  repeat
    Inc(freq[c]);
    k:= freq[c];
    {-swap nodes to keep the tree freq-ordered}
    l:= c+1;
    if k>freq[l] then begin
      while k>freq[l+1] do Inc(l);
      freq[c]:= freq[l];
      freq[l]:= k;

      i:= son[c];
      prnt[i]:= l;
      if i<T then prnt[i+1]:= l;

      j:= son[l];
      son[l]:= i;

      prnt[j]:= c;
      if j<T  then prnt[j+1]:= c;
      son[c]:= j;

      c := l;
    end;
    c:= prnt[c]
  until c=0; {do it until reaching the root}
end;

procedure EncodeChar (c: word);
var
  code,len,k: word;
begin
  code:= 0;
  len:= 0;
  k:= prnt[c+T];

  {-search connections from leaf node to the root}
  repeat
    code:= code SHR 1;
    {-if node's address is odd, output 1 else output 0}
    if (k AND 1)>0 then Inc(code,$8000);
    Inc(len);
    k:= prnt[k];
  until k=R;
  Putcode(len,code);
  update(c)
end;

procedure EncodePosition(c: word);
var
  i: word;
begin
  {-output upper 6 bits with encoding}
  i:= c SHR 6;
  Putcode(p_len[i], WORD(p_code[i]) SHL 8);
  {-output lower 6 bits directly}
  Putcode(6, (c AND $3F) SHL 10);
end;

procedure EncodeEnd;
begin
  if putlen>0 then begin
    putc(Hi(putbuf));
    Inc(codesize)
  end
end;

function DecodeChar: word;
var
  c: word;
begin
  c:= son[R];
  {-start searching tree from the root to leaves.
    choose node #(son[]) if input bit = 0
    else choose #(son[]+1) (input bit = 1)}
  while c<T do c:= son[c+GetBit];
  Dec(c,T);
  update(c);
  DecodeChar:= c
end;

function DecodePosition: word;
var
  i,j,c: word;
begin
  {-decode upper 6 bits from given table}
  i:= GetByte;
  c:= WORD(d_code[i]) SHL 6;
  j:= d_len[i];
  {-input lower 6 bits directly}
  Dec(j,2);
  while j>0 do begin
    Dec(j);
    i:= (i SHL 1) OR GetBit;
  end;
  DecodePosition:= c OR (i AND $3F);
end;

{-Compression }
procedure Encode (bytes: LongInt);
{-Encoding/Compressing}
type
  ByteRec = record
              b0,b1,b2,b3: byte
            end;
var
  i,c,len,r,s,last_match_length: word;
begin
  {-write size of original text}
  with ByteRec(Bytes) do begin
    putc(b0);
    putc(b1);
    putc(b2);
    putc(b3)
  end;
  if bytes=0 then exit;
  textsize:= 0;
  StartHuff;
  InitTree;
  s:= 0;
  r:= N-F;
  fillchar(text_buf[0],r,' ');
  len:= 0;
  while (len<F) AND (inptr OR inend>0) do begin
    text_buf[r+len]:= getc;
    Inc(len)
  end;
  textsize := len;
  for i:= 1 to F do InsertNode(r - i);
  InsertNode(r);
  repeat
    if match_length>len then match_length:= len;
    if match_length<=THRESHOLD then begin
      match_length := 1;
      EncodeChar(text_buf[r])
    end else begin
      EncodeChar(255 - THRESHOLD + match_length);
      EncodePosition(match_position)
    end;
    last_match_length := match_length;
    i:= 0;
    while (i<last_match_length) AND (inptr OR inend>0) do begin
      Inc(i);
      DeleteNode(s);
      c:= getc;
      text_buf[s]:= c;
      if s<F-1 then text_buf[s+N]:= c;
      s:= (s+1) AND (N-1);
      r:= (r+1) AND (N-1);
      InsertNode(r);
    end;
    Inc(textsize,i);
    if textsize>printcount then begin
      write(textsize,#13);
      Inc(printcount,1024)
    end;
    while i<last_match_length do begin
      Inc(i);
      DeleteNode(s);
      s := (s+1) AND (N-1);
      r := (r+1) AND (N-1);
      Dec(len);
      if len>0 then InsertNode(r)
    end;
  until len=0;
  EncodeEnd;
  writeln('input:  ',textsize,' bytes');
  writeln('output: ',codesize,' bytes');
  writeln('compression: ',textsize*100 DIV codesize,'%');
end;

procedure Decode;
{-Decoding/Uncompressing}
type
  ByteRec = Record
              b0,b1,b2,b3: byte
            end;
var
  i,j,k,r,c: word;
  count: LongInt;
begin
  {-read size of original text}
  with ByteRec(textsize) do begin
    b0:= getc;
    b1:= getc;
    b2:= getc;
    b3:= getc
  end;
  if textsize=0 then exit;
  StartHuff;
  fillchar(text_buf[0],N-F,' ');
  r:= N-F;
  count:= 0;
  while count<textsize do begin
    c:= DecodeChar;
    if c<256 then begin
      putc(c);
      text_buf[r]:= c;
      r:= (r+1) AND (N-1);
      Inc(count)
    end else begin
      i:= (r-DecodePosition-1) AND (N-1);
      j:= c-255+THRESHOLD;
      for k:= 0 to j-1 do begin
        c:= text_buf[(i+k) AND (N-1)];
        putc(c);
        text_buf[r]:= c;
        r:= (r+1) AND (N-1);
        Inc(count)
      end;
    end;
    if count>printcount then begin
      write(count,#13);
      Inc(printcount,1024)
    end
  end;
  writeln(count);
end;

end.


[Back to DATATYPE SWAG index]  [Back to Main SWAG index]  [Original]