[Back to TEXTFILE SWAG index]  [Back to Main SWAG index]  [Original]

Hi! Someone was needing help speeding up a duplicate line finder.
Here is what I came up with (it's tested, TP 6.0)
It needs the txtSeek unit I'm also posting here. I converted txtSeek
from some code I found here (written in German), hope that person
doesn't mind...

{D-,I-,L-,R-,X+}
unit TxtSeek;
interface

 function TextFilePos(var f:text):LongInt;         {FilePos}
 function TextFileSize(var f:text):LongInt;        {FileSize}
 procedure TextSeek(var f:text;Pos:LongInt);       {Seek}
 procedure TextSeekRel(var f:text; Count:Longint); {Relative Seek}

implementation
uses dos;

const
 sAbs=0;     { for use with DosSeek }
 sRel=1;
 sEnd=2;

function DosSeek(handle:word; posn:LongInt; func:byte):longint;assembler;asm
 mov ah,$42; mov al,func; mov bx,handle;
 mov dx,word ptr posn; mov cx,word ptr posn+2; int $21;
 jnc @S; mov inOutRes,ax; xor ax,ax; xor dx,dx; @S:
 end;

function TextFilePos(var f:text):LongInt;begin
 textFilePos:=DosSeek(Textrec(f).handle,0,sRel)
               -TextRec(f).BufEnd+TextRec(f).BufPos;
 end;

function TextFileSize(var f:text):LongInt;var Temp:LongInt;begin
 case TextRec(f).Mode of
  fmInput:with Textrec(f) do begin
           Temp:=DosSeek(handle, 0, sRel);
           textFileSize:=DosSeek(handle, 0, sEnd);
           DosSeek(handle, Temp, sAbs);
           end;
  fmOutput:textFileSize:=TextFilePos(f);
  else begin
   textFileSize:=0;
   InOutRes:=1;
   end;
  end;
 end;

procedure TextSeek(var f:text; Pos:LongInt);begin
 dosSeek(textRec(f).handle, pos, sAbs);
 textRec(f).bufPos:=textRec(f).bufEnd;  {force read}
 end;

procedure TextSeekRel(var f:text; Count:LongInt);begin
 dosSeek(textRec(f).handle, count, sRel);
 textRec(f).bufPos:=textRec(f).bufEnd;  {force read}
 end;

end.

<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>

{$A-,B-,D-,E-,F-,G+,I-,L-,N-,O-,R-,S-,V-,X+}
{$M $800,$8000,$8000} {require heap memory}
Uses Crt,txtSeek;

type bufType=array[0..32767] of char;  {try this, it's a nice round binary #}
Var
 buff:^bufType;
 f, f2:Text;
 looking,s,parm:String[80];
 n,siz:Longint;
 dupes:word;

Procedure CheckError(Err:integer); Begin
 TextColor(12);
 Case Err Of
  -1: WriteLn('You must specify a file on the command line.');
  2: WriteLn('Can''t find "', parm,'"');
  4: WriteLn('Too many open files to open ', parm);
  3,5..162: WriteLn('Error in reading ', parm);
  End;
 if err<>0 then begin WriteLn; Halt(1);end;
 End;

Begin
 If Paramcount<1 Then CheckError(-1);
 parm:=paramstr(1);
 Assign(f,parm);
 New(buff);
 SetTextBuf(f,buff^);
 Reset(f);
 checkError(IoResult);
 Assign(f2,'FINDDUPE.$$$');
 ReWrite(f2);
 checkError(IoResult);
 siz:=textFileSize(f);
 Writeln('Deleting duplicate lines');
 write('  0% complete');
 n := 0;
 dupes:=0;
 Reset(f);
 While not eof(f) Do Begin
  Readln(f,Looking);
  n:=textFilePos(f);
  repeat
   Readln(f, s);
   until (s=looking) or eof(f);
  if eof(f)then writeln(f2, looking) else inc(dupes);
  Write(^M,(n*100)div siz:3);
  textSeek(f, n);
  End;
 Close(f);
 erase(f);   {erase original file}
 Close(f2);
 rename(f2,parm);  {rename temp file on top of it}
 dispose(buff);
 writeln(^M'Found ',dupes,' duplicates');
 End.


 * OLX 2.2 * This tagline was created with 100% recycled electrons...

--- Maximus 2.01wb
 * Origin: >>> Sun Mountain BBS <<< (303)-665-6922 (1:104/123)
              

[Back to TEXTFILE SWAG index]  [Back to Main SWAG index]  [Original]