[Back to SORTING SWAG index] [Back to Main SWAG index] [Original]
(* Start of PART 1 of 7 *)
(***********************************************************************
Contest 3 Entry : Anagram Sort by Guy McLoughlin
Compiler : Borland Pascal 7.0
***********************************************************************)
{.$DEFINE DebugMode}
{$IFDEF DebugMode}
{$A+,B-,D+,E-,F-,G+,I+,L+,N-,O-,P-,Q+,R+,S+,T+,V+,X-}
{$ELSE}
{$A+,B-,D-,E-,F-,G+,I-,L-,N-,O-,P-,Q-,R-,S+,T-,V-,X-}
{$endIF}
{$M 16384,374784,655360}
Program Anagram_Sort;
Const
co_MaxWord = 2500;
co_MaxSize = 65519;
co_SafeSize = 64500;
Type
Char_12 = Array[1..12] of Char;
st_4 = String[4];
st_10 = String[10];
st_80 = String[80];
byar_26 = Array[97..122] of Byte;
po_Buff = ^byar_Buffer;
byar_Buffer = Array[1..co_MaxSize] of Byte;
porc_Word = ^rc_Word;
rc_Word = Record
wo_Pos : Word;
ar_LtrChk : Char_12;
st_Word : st_10
end;
poar_Word = Array[0..co_MaxWord] of porc_Word;
porc_AnaGroup = ^rc_AnaGroup;
rc_AnaGroup = Record
wo_Pos : Word;
st_Group : st_80
end;
poar_AnaGroup = Array[0..co_MaxWord] of porc_AnaGroup;
poar_Generic = Array[0..co_MaxWord] of Pointer;
(***** Check For I/O errors. *)
(* *)
Procedure CheckIOerror;
Var
by_Error : Byte;
begin
by_Error := ioresult;
if (by_Error <> 0) then
begin
Writeln('Input/Output error = ', by_Error);
halt
end
end; (* CheckIOerror. *)
(***** Display HEAP error message. *)
(* *)
Procedure HeapError;
begin
Writeln('Insuficient free HEAP memory');
halt
end; (* HeapError. *)
Type
Item = Pointer;
ar_Item = poar_Generic;
CompFunc = Function(Var Item1, Item2 : Item) : Boolean;
(* end of PART 1 of 7 *)
(* Start of PART 2 of 7 *)
(***** QuickSort routine. *)
(* *)
Procedure QuickSort({update} Var ar_Data : ar_Item;
{input } wo_Left,
wo_Right : Word;
LessThan : CompFunc);
Var
Pivot,
TempItem : Item;
wo_Index1,
wo_Index2 : Word;
begin
wo_Index1 := wo_Left;
wo_Index2 := wo_Right;
Pivot := ar_Data[(wo_Left + wo_Right) div 2];
Repeat
While LessThan(ar_Data[wo_Index1], Pivot) do
inc(wo_Index1);
While LessThan(Pivot, ar_Data[wo_Index2]) do
dec(wo_Index2);
if (wo_Index1 <= wo_Index2) then
begin
TempItem := ar_Data[wo_Index1];
ar_Data[wo_Index1] := ar_Data[wo_Index2];
ar_Data[wo_Index2] := TempItem;
inc(wo_Index1);
dec(wo_Index2)
end
Until (wo_Index1 > wo_Index2);
if (wo_Left < wo_Index2) then
QuickSort(ar_Data, wo_Left, wo_Index2, LessThan);
if (wo_Index1 < wo_Right) then
QuickSort(ar_Data, wo_Index1, wo_Right, LessThan)
end; (* QuickSort. *)
(***** Sort Function to check if anagram-Word's are in sorted order *)
(* *)
Function AlphaSort(Var Item1, Item2 : Item) : Boolean; Far;
begin
AlphaSort := (porc_Word(Item1)^.st_Word < porc_Word(Item2)^.st_Word)
end; (* AlphaSort. *)
(***** Sort Function to check: *)
(* *)
(* 1 - If anagram-Words are sorted by length. *)
(* 2 - If anagram-Words are sorted by anagram-group. *)
(* 3- If anagram-Words are sorted alphabeticly. *)
(* *)
Function Sort1(Var Item1, Item2 : Item) : Boolean; Far;
begin
if (porc_Word(Item1)^.st_Word[0] <>
porc_Word(Item2)^.st_Word[0]) then
Sort1 := (porc_Word(Item1)^.st_Word[0] <
porc_Word(Item2)^.st_Word[0])
else
if (porc_Word(Item1)^.ar_LtrChk <>
porc_Word(Item2)^.ar_LtrChk) then
Sort1 := (porc_Word(Item1)^.ar_LtrChk <
porc_Word(Item2)^.ar_LtrChk)
else
Sort1 := (porc_Word(Item1)^.wo_Pos < porc_Word(Item2)^.wo_Pos)
end; (* Sort1. *)
(***** Sort Function to check: *)
(* *)
(* If anagram-group Strings are sorted alphabeticly. *)
(* *)
Function Sort2(Var Item1, Item2 : Item) : Boolean; Far;
begin
Sort2 := (porc_AnaGroup(Item1)^.wo_Pos <
porc_AnaGroup(Item2)^.wo_Pos)
end; (* Sort2. *)
(* end of PART 2 of 7 *)
(* Start of PART 3 of 7 *)
(***** Check if the anagram-Word table is in sorted order. *)
(* *)
Function TableSorted({input } Var ar_Data : poar_Word;
wo_Left,
wo_Right : Word) : {output} Boolean;
Var
wo_Index : Word;
begin
(* Set Function result to True. *)
TableSorted := True;
(* Loop through all but the last Word in the anagram- *)
(* Word "table". *)
For wo_Index := wo_Left to pred(wo_Right) do
(* Check if the current and next anagram-Words are not *)
(* sorted. *)
if (ar_Data[wo_Index]^.st_Word >
ar_Data[succ(wo_Index)]^.st_Word) then
begin
(* Set Function result to False, and break the "for" *)
(* loop. *)
TableSorted := False;
break
end
end; (* TableSorted. *)
(***** Pack bits 0,1,2 of each Byte in 26 Byte Array into 10 Chars. *)
(* *)
Procedure PackBits({input } Var byar_Temp : byar_26;
{output} Var Char_Temp : Char_12);
begin
Char_Temp[ 1] := chr((byar_Temp[ 97] and $7) shl 5 +
(byar_Temp[ 98] and $7) shl 2 +
(byar_Temp[ 99] and $6) shr 1);
Char_Temp[ 2] := chr((byar_Temp[ 99] and $1) shl 7 +
(byar_Temp[100] and $7) shl 4 +
(byar_Temp[101] and $7) shl 1 +
(byar_Temp[102] and $4) shr 2);
Char_Temp[ 3] := chr((byar_Temp[102] and $3) shl 6 +
(byar_Temp[103] and $7) shl 3 +
(byar_Temp[104] and $7));
Char_Temp[ 4] := chr((byar_Temp[105] and $7) shl 5 +
(byar_Temp[106] and $7) shl 2 +
(byar_Temp[107] and $6) shr 1);
Char_Temp[ 5] := chr((byar_Temp[107] and $1) shl 7 +
(byar_Temp[108] and $7) shl 4 +
(byar_Temp[109] and $7) shl 1 +
(byar_Temp[110] and $4) shr 2);
Char_Temp[ 6] := chr((byar_Temp[110] and $3) shl 6 +
(byar_Temp[111] and $7) shl 3 +
(byar_Temp[112] and $7));
Char_Temp[ 7] := chr((byar_Temp[113] and $7) shl 5 +
(byar_Temp[114] and $7) shl 2 +
(byar_Temp[115] and $6) shr 1);
Char_Temp[ 8] := chr((byar_Temp[115] and $1) shl 7 +
(byar_Temp[116] and $7) shl 4 +
(byar_Temp[117] and $7) shl 1 +
(byar_Temp[118] and $4) shr 2);
Char_Temp[ 9] := chr((byar_Temp[118] and $3) shl 6 +
(byar_Temp[119] and $7) shl 3 +
(byar_Temp[120] and $7));
Char_Temp[10] := chr((byar_Temp[121] and $7) shl 5 +
(byar_Temp[122] and $7) shl 2)
end; (* PackBits. *)
Var
po_Buffer : po_Buff;
by_Index,
by_LastAnagram,
by_CurrentWord : Byte;
wo_Index,
wo_ReadIndex,
wo_TableIndex,
wo_BufferIndex,
wo_CurrentIndex : Word;
(* end of PART 3 of 7 *)
(* Start of PART 4 of 7 *)
st_Temp : st_4;
byar_LtrChk : byar_26;
fi_Temp : File;
rcar_Table : poar_Word;
rcar_Groups : poar_AnaGroup;
(* Main Program execution block. *)
begin
(* If there is sufficient room, allocate the main data- *)
(* buffer on the HEAP. *)
if (maxavail > co_MaxSize) then
new(po_Buffer)
else
(* Else, inform user of insufficient HEAP memory, and *)
(* halt the Program. *)
HeapError;
(* Clear the data-buffer. *)
fillChar(po_Buffer^, co_MaxSize, 0);
(* Initialize counter Variable. *)
wo_Index := 0;
(* While the counter is less than co_MaxWord do... *)
While (co_MaxWord > wo_Index) do
(* If there is sufficient memory, allocate another *)
(* anagram-Word Record on the HEAP. *)
if (maxavail > sizeof(rc_Word)) then
begin
inc(wo_Index);
new(rcar_Table[wo_Index]);
fillChar(rcar_Table[wo_Index]^, sizeof(rc_Word), 0);
end
else
(* Else, inform user of insufficient HEAP memory, and *)
(* halt the Program. *)
HeapError;
(* Initialize counter Variable. *)
wo_Index := 0;
(* While the counter is less than co_MaxWord do... *)
While (co_MaxWord > wo_Index) do
(* If there is sufficient memory, allocate another *)
(* anagram-group String on the HEAP. *)
if (maxavail > sizeof(rc_AnaGroup)) then
begin
inc(wo_Index);
new(rcar_Groups[wo_Index]);
fillChar(rcar_Groups[wo_Index]^, sizeof(rc_AnaGroup), 32);
end
else
(* Else, inform user of insufficient HEAP memory, and *)
(* halt the Program. *)
HeapError;
(* Attempt to open File containing the anagram-Words. *)
assign(fi_Temp, 'WordLIST.DAT');
(* Set Filemode to "read-only". *)
Filemode := 0;
{$I-}
reset(fi_Temp, 1);
{$I+}
(* Check For I/O errors. *)
if (ioresult <> 0) then
begin
Writeln('Error opening anagram data File ---> WordLIST.DAT');
halt
end;
(* Read-in the entire anagram list into the data-buffer *)
blockread(fi_Temp, po_Buffer^, co_MaxSize, wo_ReadIndex);
(* end of PART 4 of 7 *)
(* Start of PART 5 of 7 *)
(* Check For I/O errors. *)
CheckIOerror;
close(fi_Temp);
(* Check For I/O errors. *)
CheckIOerror;
(* Initialize index Variables. *)
wo_TableIndex := 0;
wo_BufferIndex := 0;
(* Repeat...Until all data in the data-buffer has been *)
(* processed. *)
Repeat
(* Repeat...Until a valid anagram-Word Character has *)
(* been found, or the complete data-buffer has been *)
(* processed. *)
Repeat
inc(wo_BufferIndex)
Until ((po_Buffer^[wo_BufferIndex] > 96)
and (po_Buffer^[wo_BufferIndex] < 123))
or (wo_BufferIndex > wo_ReadIndex);
(* If the complete data-buffer has been processed then *)
(* break the Repeat...Until loop. *)
if (wo_BufferIndex > wo_ReadIndex) then
break;
(* Advance the anagram-Word "table" index. *)
inc(wo_TableIndex);
(* Clear the "letter check" Byte-Array Variable. *)
fillChar(byar_LtrChk, sizeof(byar_26), 0);
(* Repeat...Until not an anagram-Word Character, or *)
(* complete data-buffer has been processed. *)
Repeat
(* With the current anagram-Word Record do... *)
With rcar_Table[wo_TableIndex]^ do
begin
(* Record the number of each alphabetical Character in *)
(* the anagram-Word. *)
inc(byar_LtrChk[po_Buffer^[wo_BufferIndex]]);
(* Advance the String length-Character. *)
inc(st_Word[0]);
(* Add the current anagram-Word Character to anagram- *)
(* Word String. *)
st_Word[ord(st_Word[0])] :=
chr(po_Buffer^[wo_BufferIndex]);
(* Advance the data-buffer index. *)
inc(wo_BufferIndex)
end
Until (po_Buffer^[wo_BufferIndex] < 97)
or (po_Buffer^[wo_BufferIndex] > 122)
or (wo_BufferIndex > wo_ReadIndex);
(* Pack bits 0,1,2 of each Character in "letter-check" *)
(* Variable, to store Variable as 10 Char data. This *)
(* reduces memory storage requirements by 16 Bytes For *)
(* each anagram-Word, and makes data faster to sort. *)
PackBits(byar_LtrChk, rcar_Table[wo_TableIndex]^.ar_LtrChk);
Until (wo_BufferIndex > wo_ReadIndex);
(* Check if the Array of anagram-Words in the "table" *)
(* Array are sorted. If not then sort them. *)
if not TableSorted(rcar_Table, 1, wo_TableIndex) then
QuickSort(poar_Generic(rcar_Table), 1, wo_TableIndex, AlphaSort);
(* Record the position of all the anagram-Words on the *)
(* "table" Array. This will be used as a faster sorting *)
(* index. *)
For wo_Index := 1 to wo_TableIndex do
rcar_Table[wo_Index]^.wo_Pos := wo_Index;
(* end of PART 5 of 7 *)
(* Start of PART 6 of 7 *)
(* QuickSort the "table" of anagram Words, using Sort1 *)
(* routine. *)
QuickSort(poar_Generic(rcar_Table), 1, wo_TableIndex, Sort1);
(* Attempt to open a File to Write sorted data to. *)
assign(fi_Temp, 'SORTED.DAT');
{$I-}
reWrite(fi_Temp, 1);
(* Check For I/O errors. *)
CheckIOerror;
(* Set the temporary String to ', ' + Cr + Lf. *)
st_Temp := ', ' + #13#10;
(* Reset the loop index. *)
wo_Index := 1;
(* Repeat...Until all anagram-Word on "table" Array are *)
(* processed. *)
Repeat
(* Reset the counter Variables. *)
by_LastAnagram := 0;
by_CurrentWord := 0;
(* While the next anagram-Word belongs to the same *)
(* anagram-group, advance the by_LastAnagram Variable. *)
While (rcar_Table[(wo_Index + by_LastAnagram)]^.ar_LtrChk =
rcar_Table[succ(wo_Index + by_LastAnagram)]^.ar_LtrChk) do
inc(by_LastAnagram);
(* Repeat...Until next anagram-Word is not in the same *)
(* anagram group. *)
Repeat
(* With current anagram group do... *)
With rcar_Groups[(wo_Index + by_CurrentWord)]^ do
begin
(* Move the first anagram-Word in "table" Array to the *)
(* current anagram group-String. *)
move(rcar_Table[(wo_Index + by_CurrentWord)]^.st_Word[1],
st_Group[1], ord(rcar_Table[(wo_Index +
by_CurrentWord)]^.st_Word[0]));
(* Set the length-Char of current anagram-String to 12. *)
st_Group[0] := #12;
(* Record the first anagram-Word position. *)
wo_Pos := rcar_Table[(wo_Index + by_CurrentWord)]^.wo_Pos;
(* Loop from 0 to total number of anagrams in the group *)
For by_Index := 0 to by_LastAnagram do
(* If the loop index is not equal the the current *)
(* anagram-Word, then... *)
if (by_Index <> by_CurrentWord) then
begin
(* Add the next anagram-Word to the anagram-String. *)
move(rcar_Table[(wo_Index + by_Index)]^.st_Word[1],
st_Group[succ(length(st_Group))],
ord(rcar_Table[(wo_Index +
by_Index)]^.st_Word[0]));
(* Record the length of the anagram-Word added to the *)
(* anagram-String. *)
inc(st_Group[0],
ord(rcar_Table[(wo_Index +
by_Index)]^.st_Word[0]));
(* If the current anagram-Word is not the last anagram- *)
(* Word of the anagram-group, and the loop-index is *)
(* less than the last anagram-Word, or the loop-index *)
(* is less than the 2nd to last anagram-Word in group *)
if ((by_CurrentWord <> by_LastAnagram) and
(by_Index < by_LastAnagram))
or (by_Index < pred(by_LastAnagram)) then
begin
(* end of PART 6 of 7 *)
(* Start of PART 7 of 7 *)
(* Add the comma and space Character to anagram-String. *)
move(st_Temp[1],
st_Group[succ(length(st_Group))], 2);
inc(st_Group[0], 2)
end
end;
(* Add the CR + Lf to anagram String. *)
move(st_Temp[3], st_Group[succ(length(st_Group))], 2);
inc(st_Group[0], 2);
(* Advance the currrent anagram-Word index. *)
inc(by_CurrentWord)
end
Until (by_CurrentWord > by_LastAnagram);
(* Advance the anagram-group index by the current *)
(* anagram-Word index. *)
inc(wo_Index, by_CurrentWord);
Until (wo_Index > wo_TableIndex);
(* QuickSort the anagram-Strings, using Sort2. *)
QuickSort(poar_Generic(rcar_Groups), 1, wo_TableIndex, Sort2);
(* Initialize loop control Variable. *)
wo_CurrentIndex := 1;
(* Repeat Until all the anagram Words in the "table" *)
(* Array have been processed. *)
Repeat
(* Initialize loop control Variable. *)
wo_BufferIndex := 1;
(* Place all the anagram-Strings in the data-buffer. *)
While (wo_CurrentIndex <= wo_TableIndex)
and (wo_BufferIndex < co_SafeSize) do
With rcar_Groups[wo_CurrentIndex]^ do
begin
(* Place current anagram-String in the data-buffer. *)
move(st_Group[1], po_Buffer^[wo_BufferIndex],
length(st_Group));
(* Advance the data-buffer index by length of anagram- *)
(* String. *)
inc(wo_BufferIndex, length(st_Group));
(* Advance current anagram-String index. *)
inc(wo_CurrentIndex)
end;
(* Write the anagram Text data in the buffer to disk. *)
blockWrite(fi_Temp, po_Buffer^[1], pred(wo_BufferIndex));
(* Check For I/O errors. *)
CheckIOerror;
Until (wo_CurrentIndex >= wo_TableIndex);
(* Close the sorted anagram-Text File. *)
close(fi_Temp);
(* Check For I/O errors. *)
CheckIOerror
end.
(* end of PART 7 of 7 *)
{ Hi, to All:
...I gather that the 3rd Programming contest (Anagram Word sort)
is officially over, and am now posting my entry's source-code.
This Program should execute in well under 1 second on a 486-33
ram-disk. (It's about 3.21 sec on my 386sx-25) The final compiled
size of the .EXE is 7360 Bytes.
...I've commented the h*ll out of my source-code, so it's a bit
on the big side.
...Here is a "quick" run-down of how it works:
1- Creates a 60K buffer on the HEAP.
2- Creates an Array table to store all the anagram Words
and data about each Word, on the HEAP.
3- Creates an Array of anagram-group Strings on the HEAP.
4- Read the entire anagram-Word input File WordLIST.DAT
into the 60K buffer in 1 big chunk.
5- Finds all the anagram-Words in the buffer, and assigns
their data to the anagram-Word table on the HEAP.
6- Each letter of every anagram-Word is Recorded in an
Array of 26 Bytes. Then the first 3 bits of each of
the 26 Bytes is packed, so that this data can be
stored in a 10 Character Array in each anagram-Word
table Record. (The bits are packed to save space and
to make the sorting faster.) This method allows for
a maximum of 7 of the same letter in each Word, which
should be sufficient For this contest.
7- The table of anagram Records is then checked to see if
the anagram-Words are in sorted order. (In this contest
the original input File is in sorted order.) If they are
not in sorted order, QuickSort is called to put the
Words (actually Pointers to the Words) in order.
8- Now that the anagram-Words are in sorted order, their
position in the anagram-Word table is Recorded in a
position field within each anagram-Word Record.
9- The table of anagram-Word Records is now sorted using
a multi-key QuickSort. This will sort the anagram-Word
Records by:
1- Length of anagram-Word.
2- Letters that each anagram-Word contains.
3- Alphabeticly.
...This multi-key sort will establish the anagram groups,
and sort the members of each group alphabeticly.
10- Open the sorted output File.
11- Create N number of anagram-Strings from N mumber of anagram-
Words in each anagram-group. Keeping the anagram Words in
the String in sorted order.
12- QuickSort the anagram-group Strings into alphabetical order.
13- Place all the sorted anagram-group Strings back into the
60K buffer.
14- Write the entire buffer to the SORTED.DAT File, and close
this File.
NOTES: Well this is the first time I've figured out how to do
multi-key QuickSorts, which I wasn't sure was possible
at first.
I also tried using a 32-bit CRC value to identify the
anagram-groups which ran even faster, but should not
be considered a "safe" method, as it's accuracy is only
guaranteed For 2-7 Character Words.
File I/O and repetitive loops are usually the big speed
killers in these Types of contests, so I always try to
keep them to a minimum.
...My entry could possibly be tweaked further still,
but I've got a life. <g>
}
[Back to SORTING SWAG index] [Back to Main SWAG index] [Original]