[Back to STRINGS SWAG index]  [Back to Main SWAG index]  [Original]

uses crt,dos;
var infile:text;
    instring,st1,st2,st3:string;
    letter:string[1];
    j:integer;
    h,m,s,hund,h2,m2,s2,hund2:Word;
    avail,avail2:longint;

FUNCTION SOUNDEXxx(na:string):string;
{fine, not too fast, converted from BASIC routine, has gotos}
var i,e,valcode,value:integer;
    k,ee,l,cd:string;
const
    code:string='01230120022455012623010202';
               { ABCDEFGHIJKLMNOPQRSTUVWXYZ }
label 312,314;

begin
   l:='';
   k:='';
   cd:='';
   if length(na)<2 then goto 314;
   for i:= 2 to length(na) do
   begin
      na[i]:=upcase(na[i]);
      if na[i] in ['A' .. 'Z'] then e:=ord(na[i])-64 else e:=0;
      if (e>26) or (e<1) then goto 312;
      k:=copy(code,e,1);
      if (k=l) or (k='0') then goto 312;
      cd:=concat(cd,k);
      if length(cd) >2 then goto 314;
312:  l:=k;
   end;
314:  cd:=concat(cd,'0000');
      delete(cd,4,30);
      soundexxx:=cd;
end; { SOUNDEXxx }



FUNCTION SOUNDEX3(na:string):string;
{same as soundexxx without gotos, faster}
var i,e,ll:integer;
    l,cd,k:string;

const
    code : string = '01230120022455012623010202';
    letters:string= 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
begin
     l:='';
     k:='';
     cd:='';
     if length(na)<2 then
     begin
        soundex3:='000';
        exit;
     end;
     i:=2;
     ll:=length(na);
     repeat
        na[i]:=upcase(na[i]);
        if na[i] in ['A'..'Z'] then e:=ord(na[i])-64 else e:=0;
        if e>0 then
        begin
           k:=copy(code,e,1);
           if (k<>l)and(k<>'0') then
           begin
              cd:=cd+k;
              if length(cd)>2 then i:=ll+1;
           end;
        end;
        l:=k;
        inc(i);
     until i>ll;
     cd:=cd+'000';
     soundex3:=copy(cd,1,3);
end; { SOUNDEX3 }



FUNCTION SOUNDEX3b(na:string):string;
{same as soundexxx without gotos, fastest}
var i,p,ll:integer;
    l,k,j:char;
    cd:string[3];
const code:string='901230120022455012623010202';
    letters:string='ABCDEFGHIJKLMNOPQRSTUVWXYZ';

begin
   l:=#0;
   k:=#0;
   j:=#0;
   p:=1;
   cd:='000';
   if length(na)<2 then
   begin
      soundex3b:=cd;
      exit;
   end;
   i:=2;
   ll:=length(na);
   repeat
      j:=code[succ(pos(upcase(na[i]),letters))];
      if (j<>'9')then k:=j;
      if (k<>l)and(k<>'0') then
      begin
         cd[p]:=k;
         inc(p);
         if p>3 then i:=ll+1;
      end;
      l:=k;
      inc(i);
   until i>ll;
   soundex3b:=cd;
end; { SOUNDEX3b }


function soundex_asm(var S:string):string;assembler;
const trans:array[0..25]of byte=
  (0,1,2,3,0,1,2,0,0,2,2,4,5,5,0,1,2,6,2,3,0,1,0,2,0,2);
 { a b c d e f g h i j k l m n o p q r s t u v w x y z }
asm
   cld              {set direction}
   les di,@result   {pointer to output soundex code}
   Xor ax,ax
   mov bx,di
   add bx,3         {bx=pointer last char of soundex}
   mov al,3
   stosb            {length of result}
   mov al,'0'
   push di
   mov cx,3
   repnz stosb      {pad soundex with '000'}
   pop di           {points to first byte of soundex code}
   lds si,[S]       {pointer to input string}
   Xor ax,ax
   mov al,[si]      {length of input string}
   cmp al,1         {input must be at least 2 characters long}
   jbe @quitter     {too short, or null input string - bail}
   add ax,si
   mov dx,ax        {dx=pointer last byte S}
   inc si
   inc si           {si=pointer second byte S}
                    {dx=lastchar s}
                    {bx=lastchar result}
                    {si=secondchar s}
                    {di=firstchar result}
                    {cx=last letter code rememberers}
   mov cx,0
 @nextchar:
   xor ax,ax
   lodsb            {get next char from input}
   cmp al,'Z'       {check for upper case}
   jg  @CaseOK
   cmp al,'A'
   jl  @CaseOK
   or al,$20        {make lower case}
@CaseOK:
   cmp al,'z'       {check for alphabetical range}
   jg @nocode
   cmp al,'a'
   jl @nocode
   sub al,'a'       {shift down so 'a'=0 for translation offset}
   push bx          {save pointer}
   mov bx,offset trans
   xlat             {get translation value}
   pop bx           {retreive end of input string pointer}
   mov ch,al
   cmp al,0
   je @nocode
   cmp ch,cl
   je @nocode
   add al,'0'
   stosb          {put soundex in code}
@nocode:
   mov cl,ch
   cmp di,bx
   jg @quitter
   cmp si,dx
   jbe @nextchar
 @quitter:
end;



function soundex_asm2(var S:string):string;assembler;
{works without global variable}
asm
   jmp @start
@trans: DB 0,1,2,3,0,1,2,0,0,2,2,4,5,5,0,1,2,6,2,3,0,1,0,2,0,2
         { a b c d e f g h i j k l m n o p q r s t u v w x y z }
@start:
   push ds
   cld              {set direction}
   les di,@result   {pointer to output soundex code}
   Xor ax,ax
   mov bx,di
   add bx,3         {bx=pointer last char of soundex}
   mov al,3
   stosb            {length of result}
   mov al,'0'
   push di
   mov cx,3
   repnz stosb      {pad soundex with '000'}
   pop di           {points to first byte of soundex code}
   lds si,S       {pointer to input string}
   Xor ax,ax
   lodsb
  { mov al,[si]}      {length of input string}
   cmp al,1         {input must be at least 2 characters long}
   jbe @quitter     {too short, or null input string - bail}
   add ax,si
   mov dx,ax        {dx=pointer last byte S}
   dec dx
{   inc si}
   inc si           {si=pointer second byte S}
                    {dx=lastchar s}
                    {bx=lastchar result}
                    {si=secondchar s}
                    {di=firstchar result}
                    {cx=last letter code rememberers}
   mov cx,0
 @nextchar:
   xor ax,ax
   lodsb            {get next char from input}
   cmp al,'Z'       {check for upper case}
   jg  @CaseOK
   cmp al,'A'
   jl  @CaseOK
   or al,$20        {make lower case}
@CaseOK:
   cmp al,'z'       {check for alphabetical range}
   jg @nocode
   cmp al,'a'
   jl @nocode
   sub al,'a'       {shift down so 'a'=0 for translation offset}
   push bx          {save pointer}
   mov bx,offset @trans
   SEGCS xlat             {get translation value}
   pop bx           {retreive end of input string pointer}
   mov ch,al
   cmp al,0
   je @nocode
   cmp ch,cl
   je @nocode
   add al,'0'
   stosb          {put soundex in code}
@nocode:
   mov cl,ch
   cmp di,bx
   jg @quitter
   cmp si,dx
   jbe @nextchar
 @quitter:
   pop ds
end;



function experiment(var s:string):string;
begin
   experiment:=soundex_asm2(s);
end;


procedure compare;
var istr:string;
begin
   write(letter,',');
   while not eof(infile) do
   begin
      readln(infile,instring);
      if letter[1]<>upcase(instring[1]) then
      begin
         letter[1]:=upcase(instring[1]);
         write(letter,',');
      end;
      istr:=instring;
      st2:=soundexxx(instring);
      if soundex3(instring)<>st2 then write('sx3 ');
      if soundex3b(instring)<>st2 then write('sx3b ');
{      if soundex_asm2(instring)<>st2 then write('sxasm ');}
      if experiment(instring)<>st2 then write('sxasm');
      st1:=soundex3b(instring);
      if(st1<>st2) then
         writeln(instring,' ',st1,' ',st2);
      if istr<>instring then writeln(istr,'  ',instring);
   end;
   writeln;
end;

procedure speed;
var t1,t2:real;
begin
   writeln('timing soundexxx');
   close(infile);
   reset(infile);
   GetTime(h,m,s,hund);
   while not eof(infile)do
   begin
      readln(infile,instring);
      st1:=soundexxx(instring);
   end;
   gettime(h2,m2,s2,hund2);
   t1:=(h*3600)+(m*60)+s+(hund/100);
   t2:=(h2*3600)+(m2*60)+s2+(hund2/100);
   WriteLn('Elapsed time ',(t2-t1):0:2,' seconds');
   writeln;
   writeln('timing soundex3');
   close(infile);
   reset(infile);
   GetTime(h,m,s,hund);
   while not eof(infile)do
   begin
      readln(infile,instring);
      st1:=soundex3(instring);
   end;
   gettime(h2,m2,s2,hund2);
   t1:=(h*3600)+(m*60)+s+(hund/100);
   t2:=(h2*3600)+(m2*60)+s2+(hund2/100);
   WriteLn('Elapsed time ',(t2-t1):0:2,' seconds');
   writeln;

   writeln('timing soundex3b');
   close(infile);
   reset(infile);
   GetTime(h,m,s,hund);
   while not eof(infile)do
   begin
      readln(infile,instring);
      st1:=soundex3b(instring);
   end;
   gettime(h2,m2,s2,hund2);
   t1:=(h*3600)+(m*60)+s+(hund/100);
   t2:=(h2*3600)+(m2*60)+s2+(hund2/100);
   WriteLn('Elapsed time ',(t2-t1):0:2,' seconds');
   writeln;
   writeln('timing soundex_asm');
   close(infile);
   reset(infile);
   GetTime(h,m,s,hund);
   while not eof(infile)do
   begin
      readln(infile,instring);
      st1:=soundex_asm(instring);
   end;
   gettime(h2,m2,s2,hund2);
   t1:=(h*3600)+(m*60)+s+(hund/100);
   t2:=(h2*3600)+(m2*60)+s2+(hund2/100);
   WriteLn('Elapsed time ',(t2-t1):0:2,' seconds');
end;



begin
   clrscr;
   letter:='A';
   assign(infile,'d:\spell\tmp\wookdic.asc');
   reset(infile);
   instring:='accord';
   st1:=soundex_asm(instring);
   compare;
{   speed;}
   close(infile);
end.

[Back to STRINGS SWAG index]  [Back to Main SWAG index]  [Original]