[Back to TEXTFILE SWAG index] [Back to Main SWAG index] [Original]
{
Program MAKEDCT, written by Steve Rogers, 1991. Takes an ASCII text
file as ParamStr(1) and creates a "dictionary file" of all the unique
words in the input file. Feel free to use it all you want and mention
my name if you feel like it... :)
}
uses
dos,
strg; { Eagle Performance Software's STRG unit. Shareware version is
available on most TP oriented BBSs. If you use the shareware
version your program will need to be compiled with $E+. }
const
MAXPTR=5000; { Max string pointers before paging out to disk }
type
pS20=^tS20;
tS20=string[20];
var
f : text;
s,
s2 : string;
s_ : array[0..MAXPTR] of pS20;
i,
n : word;
ndx,
NTempFile
: byte;
max : tS20;
{-----------------------}
procedure QSort(Lo,Hi : integer);
(*
Just a generic QuickSort using Eagle's string comparison routines
*)
var
i,j : integer;
x,y: pS20;
begin
i:= lo;
j:= hi;
x:= s_[(lo+hi) div 2];
repeat
while (strcmp(s_[i]^,x^,1,1,255)<0) do
inc(i);
while (strcmp(s_[j]^,x^,1,1,255)>0) do
dec(j);
if (i<=j) then begin
y:= s_[i];
s_[i]:= s_[j];
s_[j]:= y;
inc(i);
dec(j);
end;
until (i>j);
if (lo<j) then
qsort(lo,j);
if (i<hi) then
qsort(i,hi);
end;
{-----------------------}
procedure WriteTempFile;
var
i : word;
tempf : text;
stmp : string;
begin
inc(NTempFile);
qsort(1,n);
writeln('Filing to outfile #',NTempFile);
assign(tempf,'wordtemp.'+strl(NTempFile));
rewrite(tempf);
stmp:= '';
for i:= 1 to n do if (strcmp(s_[i]^,stmp,1,1,255)<>0) then begin
strmov(stmp,s_[i]^);
if (max<stmp) then
strmov(max,stmp);
writeln(tempf,s_[i]^);
end;
n:= 0;
close(tempf);
end;
{-----------------------}
procedure MergeTempFiles;
var
f_ : array[1..50] of text;
outf : text;
i : byte;
s_ : array[1..50] of tS20;
min : tS20;
begin
writeln('Merging ',strl(NTempFile),' temp files');
for i:= 1 to NTempFile do begin
assign(f_[i],'wordtemp.'+strl(i));
reset(f_[i]);
readln(f_[i],s_[i]);
end;
strmov(min,paramstr(1));
strovrl(min,'.DCT',strposr(min,'.',1));
assign(outf,min);
rewrite(outf);
repeat
min:= max;
for i:= 1 to NTempFile do if (s_[i]<min) then
min:= s_[i];
writeln(outf,min);
for i:= 1 to NTempFile do if (s_[i]<=min) then
if not eof(f_[i]) then
readln(f_[i],s_[i])
else
s_[i]:= #254;
until (min=max);
close(outf);
for i:= 1 to NTempFile do begin
close(f_[i]);
erase(f_[i]);
end;
end;
{-----------------------}
procedure StripPunctuation(var s : string);
const
LCCHARS=['a'..'z']; { lower case chars }
var
i : byte;
begin
{ Replace all non-alpha chars with spaces. It's lower case already. }
for i:= 1 to length(s) do
if not (s[i] IN LCCHARS) then
s[i]:= ' ';
{ Remove all double spaces }
while (strqty(s,' ')>0) do
strrepl(s,' ',' ',1,255);
end;
{-----------------------}
begin
NTempFile:= 0;
n:= 0;
max:= '';
for i:= 1 to MAXPTR do
new(s_[i]);
assign(f,paramstr(1));
reset(f);
writeln('Reading');
while not eof(f) do begin
readln(f,s);
strlwr(s);
StripPunctuation(s);
ndx:= 1;
while (ndx<>0) do begin
wrdparse(s2,s,' ',ndx);
inc(n);
strmov(s_[n]^,s2);
if (n=MAXPTR) then
WriteTempFile;
end;
end;
close(f);
WriteTempFile;
if (NTempFile>1) then
MergeTempFiles
else begin
s:= paramstr(1);
strovrl(s,'.DCT',strposr(s,'.',1));
{ Remove output file if it already exists }
if (FSearch(s,'')<>'') then begin
assign(f,s);
erase(f);
end;
assign(f,'wordtemp.1');
rename(f,s);
end;
end.
[Back to TEXTFILE SWAG index] [Back to Main SWAG index] [Original]