[Back to FINDREPL SWAG index]  [Back to Main SWAG index]  [Original]

{ BOB SWART

Here it is, all new and much faster. I used an internal binary tree to manage
the taglines. You can store up to the available RAM in taglines:
}

{$A+,B-,D-,E-,F-,G-,I-,L-,N-,O-,P-,Q-,R-,S+,T-,V-,X-}
{$M 16384,0,655360}
Uses
  Crt;
Type
  TBuffer  = Array[0..$4000] of Char;

Const
  Title = 'TagLines 0.2 by Bob Swart For Travis Griggs'#13#10;
  Usage = 'Usage: TagLines inFile outFile'#13#10#13#10+
          '       Taglines will remove dupicate lines from inFile.'#13#10+
          '       Resulting Text is placed in outFile.'#13#10;

  NumLines: LongInt = 0; { total number of lines in InFile }
  NmLdiv80: LongInt = 0; { NumLines div 80, For 'progress' }
  CurrentL: LongInt = 0; { current lineno read from InFile }

Type
  String80 = String[80];

  PBinTree = ^TBinTree;
  TBinTree = Record
               Info: String80;
               left,right: PBinTree
             end;

Var
  InBuf,
  OutBuf   : TBuffer;
  InFile,
  OutFile  : Text;
  TagLine  : String80;
  Root,
  Current,
  Prev     : PBinTree;
  i        : Integer;
  SaveExit : Pointer;


Function CompStr(Var Name1,Name2: String): Integer; Assembler;
{ Author: drs. Robert E. Swart
}
Asm
  push  DS
  lds   SI,Name1               { ds:si pts to Name1       }
  les   DI,Name2               { es:di pts to Name2       }
  cld
  lodsb                        { get String1 length in AL }
  mov   AH,ES:[DI]             { get String2 length in AH }
  inc   DI
  mov   BX,AX                  { save both lengths in BX  }
  xor   CX,CX                  { clear cx                 }
  mov   CL,AL                  { get String1 length in CX }
  cmp   CL,AH                  { equal to String2 length? }
  jb    @Len                   { CX stores minimum length }
  mov   CL,AH                  { of String1 and String2   }
 @Len: jcxz  @Exit                  { quit if null             }

 @Loop: lodsb                        { String1[i] in AL         }
  mov   AH,ES:[DI]             { String2[i] in AH         }
  cmp   AL,AH                  { compare Str1 to Str2     }
  jne   @Not                   { loop if equal            }
  inc   DI
  loop  @Loop                  { go do next Char          }
  jmp   @Exit                  { Strings OK, Length also? }

 @Not: mov   BX,AX                  { BL = AL = String1[i],
                                 BH = AH = String2[i]     }
 @Exit: xor   AX,AX
  cmp   BL,BH                  { length or contents comp  }
  je    @Equal                 { 1 = 2: return  0         }
  jb    @Lower                 { 1 < 2: return -1         }
  inc   AX                     { 1 > 2: return  1         }
  inc   AX
 @Lower: dec   AX
 @Equal: pop   DS
end {CompStr};

Procedure Stop; Far;
begin
  ExitProc := SaveExit;
  Close(InFile);
  Close(OutFile);
end {Stop};


begin
  Writeln(Title);
  if Paramcount <> 2 then
  begin
    Writeln(Usage);
    Halt
  end;

  Assign(InFile,ParamStr(1));
  SetTextBuf(InFile,InBuf);
  Reset(InFile);
  if IOResult <> 0 then
  begin
    WriteLn('Error: could not open ', ParamStr(1));
    Halt(1)
  end;

  Assign(OutFile,ParamStr(2));
  SetTextBuf(OutFile,OutBuf);
  Reset(OutFile);
  if IOResult = 0 then
  begin
    WriteLn('Error: File ', ParamStr(2),' already exists');
    Halt(2)
  end;

  ReWrite(OutFile);
  if IOResult <> 0 then
  begin
    WriteLn('Error: could not create ', ParamStr(2));
    Halt(3)
  end;

  SaveExit := ExitProc;
  ExitProc := @Stop;

  While not eof(InFile) do
  begin
    readln(InFile);
    Inc(NumLines);
  end;
  Writeln('There are ',NumLines,' lines in this File.'#13#10);
  Writeln('Press any key to stop the search For duplicate lines');
  NmLdiv80 := NumLines div 80;

  Root := nil;
  reset(InFile);
  While CurrentL <> NumLines do
  begin
    if KeyPressed then
      Halt { calls Stop };
    Inc(CurrentL);
    if (CurrentL and NmLdiv80) = 0 then
      Write('#');
    readln(InFile,TagLine);

    if root = nil then { first TagLine }
    begin
      New(Root);
      Root^.left := nil;
      Root^.right := nil;
      Root^.Info := TagLine;
      Writeln(OutFile,tagLine)
    end
    else { binary search For TagLine }
    begin
      Current := Root;
      Repeat
        Prev := Current;
        i := CompStr(Current^.Info,TagLine);
        if i > 0 then
          Current := Current^.left
        else
        if i < 0 then
          Current := Current^.right
      Until (i = 0) or (Current = nil);

      if i <> 0 then { TagLine not found }
      begin
        New(Current);
        Current^.left := nil;
        Current^.right := nil;
        Current^.Info := TagLine;

        if i > 0 then
          Prev^.left := Current { Current before Prev }
        else
          Prev^.right := Current { Current after Prev };
        Writeln(OutFile,TagLine)
      end
    end
  end;
  Writeln(#13#10'100% Completed, result is in File ',ParamStr(2))
  { close is done by Stop }
end.

{
> I also tried DJ's idea of the buffer of 65535 but it said the structure
> was too large. So I used 64512.
Always try to use a multiple of 4K, because the hard disk 'eats' space in these
chunks. Reading/Writing in these chunks goes a lot faster that way.
}

[Back to FINDREPL SWAG index]  [Back to Main SWAG index]  [Original]