[Back to ARCHIVES SWAG index] [Back to Main SWAG index] [Original]
{
Well here it is as promised. This is a Pascal port of Ross
Data compression. This particular unit does no buffer
compression/decompression but you can add it if you want.
The C implementation I did has Buffer to file compression
and file to buffer decompression.
This is a freebie and is availble for SWAG if they
want it.
Common data types unit I use a lot. Looks like Delphi
incorporated similar types.
}
(*
Common data types and structures.
*)
Unit Common;
Interface
Type
PByte = ^Byte;
ByteArray = Array[0..65000] Of Byte;
PByteArray = ^ByteArray;
PInteger = ^Integer;
IntArray = Array[0..32000] Of Integer;
PIntArray = ^IntArray;
PWord = ^Word;
WordArray = Array[0..32000] Of Word;
PWordArray = ^WordArray;
Implementation
END.
(***************************************************
* RDC Unit *
* *
* This is a Pascal port of C code from an article *
* In "The C Users Journal", 1/92 Written by *
* Ed Ross. *
* *
* This particular code has worked well under, *
* Real, Protected and Windows. *
* *
* The compression is not quite as good as PKZIP *
* but it decompresses about 5 times faster. *
***************************************************)
Unit RDCUnit;
Interface
Uses
Common;
Procedure Comp_FileToFile(Var infile, outfile: File);
Procedure Decomp_FileToFile(Var infile, outfile: File);
Implementation
Const
HASH_LEN = 4096; { # hash table entries }
HASH_SIZE = HASH_LEN * Sizeof(word);
BUFF_LEN = 16384; { size of disk io buffer }
(*
compress inbuff_len bytes of inbuff into outbuff
using hash_len entries in hash_tbl.
return length of outbuff, or "0 - inbuff_len"
if inbuff could not be compressed.
*)
Function rdc_compress(ibuff : Pointer;
inbuff_len : Word;
obuff : Pointer;
htable : Pointer) : Integer;
Var
inbuff : PByte Absolute ibuff;
outbuff : PByte Absolute obuff;
hash_tbl : PWordArray Absolute htable;
in_idx : PByte;
in_idxa : PByteArray absolute in_idx;
inbuff_end : PByte;
anchor : PByte;
pat_idx : PByte;
cnt : Word;
gap : Word;
c : Word;
hash : Word;
hashlen : Word;
ctrl_idx : PWord;
ctrl_bits : Word;
ctrl_cnt : Word;
out_idx : PByte;
outbuff_end : PByte;
Begin
in_idx := inbuff;
inbuff_end := Pointer(LongInt(inbuff) + inbuff_len);
ctrl_idx := Pointer(outbuff);
ctrl_cnt := 0;
out_idx := Pointer(longint(outbuff) + Sizeof(Word));
outbuff_end := Pointer(LongInt(outbuff) + (inbuff_len - 48));
{ skip the compression for a small buffer }
If inbuff_len <= 18 Then
Begin
Move(outbuff, inbuff, inbuff_len);
rdc_compress := 0 - inbuff_len;
Exit;
End;
{ adjust # hash entries so hash algorithm can
use 'and' instead of 'mod' }
hashlen := HASH_LEN - 1;
{ scan thru inbuff }
While LongInt(in_idx) < LongInt(inbuff_end) Do
Begin
{ make room for the control bits
and check for outbuff overflow }
If ctrl_cnt = 16 Then
Begin
ctrl_idx^ := ctrl_bits;
ctrl_cnt := 1;
ctrl_idx := Pointer(out_idx);
Inc(word(out_idx), 2);
If LongInt(out_idx) > LongInt(outbuff_end) Then
Begin
Move(outbuff, inbuff, inbuff_len);
rdc_compress := inbuff_len;
Exit;
End;
End
Else
Inc(ctrl_cnt);
{ look for rle }
anchor := in_idx;
c := in_idx^;
Inc(in_idx);
While (LongInt(in_idx) < longint(inbuff_end))
And (in_idx^ = c)
And (LongInt(in_idx) - LongInt(anchor) < (HASH_LEN + 18)) Do
Inc(in_idx);
{ store compression code if character is
repeated more than 2 times }
cnt := LongInt(in_idx) - LongInt(anchor);
If cnt > 2 Then
Begin
If cnt <= 18 Then { short rle }
Begin
out_idx^ := cnt - 3;
Inc(out_idx);
out_idx^ := c;
Inc(out_idx);
End
Else { long rle }
Begin
Dec(cnt, 19);
out_idx^ := 16 + (cnt and $0F);
Inc(out_idx);
out_idx^ := cnt Shr 4;
Inc(out_idx);
out_idx^ := c;
Inc(out_idx);
End;
ctrl_bits := (ctrl_bits Shl 1) Or 1;
Continue;
End;
{ look for pattern if 2 or more characters
remain in the input buffer }
in_idx := anchor;
If (LongInt(inbuff_end) - LongInt(in_idx)) > 2 Then
Begin
{ locate offset of possible pattern
in sliding dictionary }
hash := ((((in_idxa^[0] And 15) Shl 8) Or in_idxa^[1]) Xor
((in_idxa^[0] Shr 4) Or (in_idxa^[2] Shl 4)))
And hashlen;
pat_idx := in_idx;
Word(pat_idx) := hash_tbl^[hash];
hash_tbl^[hash] := Word(in_idx);
{ compare characters if we're within 4098 bytes }
gap := LongInt(in_idx) - LongInt(pat_idx);
If (gap <= HASH_LEN + 2) Then
Begin
While (LongInt(in_idx) < LongInt(inbuff_end))
And (LongInt(pat_idx) < LongInt(anchor))
And (pat_idx^ = in_idx^)
And (LongInt(in_idx) - LongInt(anchor) < 271) Do
Begin
Inc(in_idx);
Inc(pat_idx);
End;
{ store pattern if it is more than 2 characters }
cnt := LongInt(in_idx) - LongInt(anchor);
If cnt > 2 Then
Begin
Dec(gap, 3);
If cnt <= 15 Then { short pattern }
Begin
out_idx^ := (cnt Shl 4) + (gap And $0F);
Inc(out_idx);
out_idx^ := gap Shr 4;
Inc(out_idx);
End
Else { long pattern }
Begin
out_idx^ := 32 + (gap And $0F);
Inc(out_idx);
out_idx^ := gap Shr 4;
Inc(out_idx);
out_idx^ := cnt - 16;
Inc(out_idx);
End;
ctrl_bits := (ctrl_bits Shl 1) Or 1;
Continue;
End;
End;
End;
{ can't compress this character
so copy it to outbuff }
out_idx^ := c;
Inc(out_idx);
Inc(anchor);
in_idx := anchor;
ctrl_bits := ctrl_bits Shl 1;
End;
{ save last load of control bits }
ctrl_bits := ctrl_bits Shl (16 - ctrl_cnt);
ctrl_idx^ := ctrl_bits;
{ and return size of compressed buffer }
rdc_compress := LongInt(out_idx) - LongInt(outbuff);
End;
(*
decompress inbuff_len bytes of inbuff into outbuff.
return length of outbuff.
*)
Function RDC_Decompress(inbuff : PByte;
inbuff_len : Word;
outbuff : PByte) : Integer;
Var
ctrl_bits : Word;
ctrl_mask : Word;
inbuff_idx : PByte;
outbuff_idx : PByte;
inbuff_end : PByte;
cmd, cnt : Word;
ofs, len : Word;
outbuff_src : PByte;
Begin
ctrl_mask := 0;
inbuff_idx := inbuff;
outbuff_idx := outbuff;
inbuff_end := Pointer(LongInt(inbuff) + inbuff_len);
{ process each item in inbuff }
While LongInt(inbuff_idx) < LongInt(inbuff_end) Do
Begin
{ get new load of control bits if needed }
ctrl_mask := ctrl_mask Shr 1;
If ctrl_mask = 0 Then
Begin
ctrl_bits := PWord(inbuff_idx)^;
Inc(inbuff_idx, 2);
ctrl_mask := $8000;
End;
{ just copy this char if control bit is zero }
If (ctrl_bits And ctrl_mask) = 0 Then
Begin
outbuff_idx^ := inbuff_idx^;
Inc(outbuff_idx);
Inc(inbuff_idx);
Continue;
End;
{ undo the compression code }
cmd := (inbuff_idx^ Shr 4) And $0F;
cnt := inbuff_idx^ And $0F;
Inc(inbuff_idx);
Case cmd Of
0 : { short rle }
Begin
Inc(cnt, 3);
FillChar(outbuff_idx^, cnt, inbuff_idx^);
Inc(inbuff_idx);
Inc(outbuff_idx, cnt);
End;
1 : { long rle }
Begin
Inc(cnt, inbuff_idx^ Shl 4);
Inc(inbuff_idx);
Inc(cnt, 19);
FillChar(outbuff_idx^, cnt, inbuff_idx^);
Inc(inbuff_idx);
Inc(outbuff_idx, cnt);
End;
2 : { long pattern }
Begin
ofs := cnt + 3;
Inc(ofs, inbuff_idx^ Shl 4);
Inc(inbuff_idx);
cnt := inbuff_idx^;
Inc(inbuff_idx);
Inc(cnt, 16);
outbuff_src := Pointer(LongInt(outbuff_idx) - ofs);
Move(outbuff_src^, outbuff_idx^, cnt);
Inc(outbuff_idx, cnt);
End;
Else { short pattern}
Begin
ofs := cnt + 3;
Inc(ofs, inbuff_idx^ Shl 4);
Inc(inbuff_idx);
outbuff_src := Pointer(LongInt(outbuff_idx) - ofs);
Move(outbuff_src^, outbuff_idx^, cmd);
Inc(outbuff_idx, cmd);
End;
End;
End;
{ return length of decompressed buffer }
RDC_Decompress := LongInt(outbuff_idx) - LongInt(outbuff);
End;
Procedure Comp_FileToFile(Var infile, outfile: File);
Var
code : Integer;
bytes_read : Integer;
compress_len : Integer;
HashPtr : PWordArray;
inputbuffer,
outputbuffer : PByteArray;
Begin
Getmem(HashPtr, HASH_SIZE);
Fillchar(hashPtr^, HASH_SIZE, #0);
Getmem(inputbuffer, BUFF_LEN);
Getmem(outputbuffer, BUFF_LEN);
{ read infile BUFF_LEN bytes at a time }
bytes_read := BUFF_LEN;
While bytes_read = BUFF_LEN Do
Begin
Blockread(infile, inputbuffer^, BUFF_LEN, bytes_read);
{ compress this load of bytes }
compress_len := RDC_Compress(PByte(inputbuffer), bytes_read,
PByte(outputbuffer), HashPtr);
{ write length of compressed buffer }
Blockwrite(outfile, compress_len, 2, code);
{ check for negative length indicating the buffer could not be compressed }
If compress_len < 0 Then
compress_len := 0 - compress_len;
{ write the buffer }
Blockwrite(outfile, outputbuffer^, compress_len, code);
{ we're done if less than full buffer was read }
End;
{ add trailer to indicate End of File }
compress_len := 0;
Blockwrite(outfile, compress_len, 2, code);
{
If (code <> 2) then
err_exit('Error writing trailer.'+#13+#10);
}
Freemem(HashPtr, HASH_SIZE);
Freemem(inputbuffer, BUFF_LEN);
Freemem(outputbuffer, BUFF_LEN);
End;
Procedure Decomp_FileToFile(Var infile, outfile: File);
Var
code : Integer;
block_len : Integer;
decomp_len : Integer;
HashPtr : PWordArray;
inputbuffer,
outputbuffer : PByteArray;
Begin
Getmem(inputbuffer, BUFF_LEN);
Getmem(outputbuffer, BUFF_LEN);
{ read infile BUFF_LEN bytes at a time }
block_len := 1;
While block_len <> 0 do
Begin
Blockread(infile, block_len, 2, code);
{
If (code <> 2) then
err_exit('Can''t read block length.'+#13+#10);
}
{ check for End-of-file flag }
If block_len <> 0 Then
Begin
If (block_len < 0) Then { copy uncompressed chars }
Begin
decomp_len := 0 - block_len;
Blockread(infile, outputbuffer^, decomp_len, code);
{
If code <> decomp_len) then
err_exit('Can''t read uncompressed block.'+#13+#10);
}
End
Else { decompress this buffer }
Begin
Blockread(infile, inputbuffer^, block_len, code);
{
If (code <> block_len) then
err_exit('Can''t read compressed block.'+#13+#10);
}
decomp_len := RDC_Decompress(PByte(inputbuffer), block_len,
PByte(outputbuffer));
End;
{ and write this buffer outfile }
Blockwrite(outfile, outputbuffer^, decomp_len, code);
{
if (code <> decomp_len) then
err_exit('Error writing uncompressed data.'+#13+#10);
}
End;
End;
Freemem(inputbuffer, BUFF_LEN);
Freemem(outputbuffer, BUFF_LEN);
End;
END.
<------------------- CUT ------------------------->
Here is the test program I used to test this. You will
have to change it to reflect other file names but it
will give you an idea of how to use the unit.
<------------------- CUT ------------------------->
Program RDCTest;
Uses
RDCUnit;
Var
fin, fout : File;
a : Array[0..50] Of Byte;
BEGIN
{
Assign(fin, 'ASMINTRO.TXT');
Reset(fin, 1);
Assign(fout, 'ASMINTRO.RDC');
Rewrite(fout, 1);
Comp_FileToFile(fin, fout);
}
Assign(fin, 'ASMINTRO.RDC');
Reset(fin, 1);
Assign(fout, 'ASMINTRO.2');
Rewrite(fout, 1);
Decomp_FileToFile(fin, fout);
Close(fin);
Close(fout);
END.
[Back to ARCHIVES SWAG index] [Back to Main SWAG index] [Original]