[Back to STRINGS SWAG index]  [Back to Main SWAG index]  [Original]

{
    Here is a function some of you may find useful, it tries to find
misspellings such as "Hello World" and "Hello Wolrd" by comparing 2 strings
and returning a percent base of 3 tests of how close they match...
Lemme know if you know of a way to improve upon this.
}

{$B-,V-,S-,R-,I-,A+}       { for speed }
 
uses dos, crt;
 
var
   string_a  : string;
   string_b  : string;    { for the 5 line example at the bottom }
 
 
 
 
{------------------------------------------------------------------------}
{ 'InStr' -For use with StrMatcher.                                      }
{                                                                        }
{     InStr is just like POS except you may specify a starting position. }
{                                                                        }
{------------------------------------------------------------------------}
function InStr(index : byte; var string1, string2 : string) : byte;
var
   tempstring : string;
begin
   tempstring := copy(string2, index, length(string2)-index);
   InStr := pos(string1, tempstring) + index - 1;
end;
 
 
 
 
{------------------------------------------------------------------------}
{ 'StrMatcher' -String Matching Procedure, Written by Kevin Currie, '94  }
{                                                                        }
{     StrMatcher accepts two pointers and (w/o case sensitivity) tries   }
{  to determine how well their strings match.  It then returns a percent }
{  value of its tests into a shortint.                                   }
{                                                                        }
{------------------------------------------------------------------------}
function strmatcher(var string1, string2 : string) : shortint;
var
   strn1, strn2, tmpstr1, tmpstr2 : string;
   len1, len2, short : byte;
   stest, which, loop : longint;
   postest1, postest2 : integer;
   perc1, perc2, perc3 : real;
   retval : shortint;
label
   string_match_100,
   string_match_len;
begin
   { ---===> Don't yell at me about the goto's, they are there for speed  }
   {         and clarity.  (It's clearer than a HUGE block under an if)   }
 
   { ---===> UpperCase the strings to see if that is where the difference }
   {         lies, and also to make the other comparisons easier.         }
 
   strn1 := string1;
   len1 := length(string1);                 { I make backup copies        }
   for loop := 1 to len1 do                 { because var is just another }
      strn1[loop] := upcase(strn1[loop]);   { way of saying pointer...    }
   strn2 := string2;                        { In other words if I didn't  }
   len2 := length(string2);                 { I would modify the original }
   for loop := 1 to len2 do                 { strings...                  }
      strn2[loop] := upcase(strn2[loop]);
 
 
   { ---===> See of the capitalized strings match }
   if (strn1 = strn2) then
   begin
      retval := 100;
      goto string_match_100;
   end; {if}
 
   { ---===> Test 1 checks the occurence of chars from string1     }
   {         against the chars in string2                          }
   stest := 0;
   for loop := 1 to len1 do
   begin
      tmpstr1 := strn1[loop];
      if (pos(tmpstr1, strn2) > 0) then inc(stest);
   end; {for}
   perc2 := stest / len1;
   stest := 0;
   for loop := 1 to len2 do
   begin
      tmpstr2 := strn2[loop];
      if (pos(tmpstr2, strn1) > 0) then inc(stest);
   end; {for}
   perc3 := stest / len2;
   perc1 := (perc3 + perc2) / 2;
   if (perc1 < 0) then perc1 := 0;

   { ---===> Test 2 Adds the Values of all the charcters in the    }
   {         string and then takes a percent of 1 vs 2.            }
 
   stest := 0;
   which := 0;
   for loop := 1 to len1 do                { ---===> the shl 4's and the  }
      stest := stest + ord(strn1[loop]);   {         shr 2's below are to }
   stest := stest shl 4;                   {         add some more weight }
   for loop := 1 to len2 do                {         to the difference.   }
      which := which + ord(strn2[loop]);
   which := which shl 4;
   loop := stest shr 2;
   if (which > stest) then loop := which shr 2;
   perc2 := 1 - (abs(stest - which) / loop);
   if (perc2 < 0) then perc2 := 0;
 
   { ---===> Test 3 checks the character position differences between  }
   {         the two strings.                                          }
   {                                                                   }
   {         NOTE:  A string being shorter than another can cause this }
   {                test to fail quite badly so null characters are    }
   {                placed in the shorter string where there are char  }
   {                mismatches until the strings are equal in length.  }
 
   if (len1 = len2) then goto string_match_len;
 
   tmpstr1 := '';
   tmpstr2 := '';
      loop :=  1;
 
   if (len1 > len2) then
   begin
      short := len1 - len2;
      which := 2;
   end else
   begin
      short := len2 - len1;
      which := 1;
   end; {if/else}
 
   while (short <> 0) do
   begin
      if (strn1[loop] = strn2[loop]) then
      begin
         case which of
            1:   tmpstr1 := tmpstr1 + strn2[loop];
            2:   tmpstr1 := tmpstr1 + strn1[loop];
         end; {case}
      end else
      begin
         case which of
            1:
            begin
               tmpstr1 := tmpstr1 + #0;
               tmpstr2 := copy(strn1, loop, (len1-loop)+1);
                 strn1 := concat(tmpstr1, tmpstr2);
               dec(short);
            end; {case1}
            2:
            begin
               tmpstr1 := tmpstr1 + #0;
               tmpstr2 := copy(strn2, loop, (len2-loop)+1);
                 strn2 := concat(tmpstr1, tmpstr2);
               dec(short);
            end; {case2}
         end; {case}
      end; {if/else}
      inc(loop);
   end; {while}
 
   len1 := length(strn1);      { ---===> Reset these after the loop that }
   len2 := length(strn2);      {         makes them the same length.     }
 
 string_match_len: {label}
 
   { ---===> Now that we have the string lengths the same lets check the }
   {         character positions.                                        }
 
   stest := 0;
   for loop := 1 to len1 do
      stest := stest + loop + loop - 1;
   which := stest;
   for loop := 1 to len1 do
   begin
      tmpstr1  := strn1[loop];
      tmpstr2  := strn2[loop];
      postest1 :=  len1 - abs(instr(loop, tmpstr2, strn1));
      postest2 :=  len2 - abs(instr(loop, tmpstr2, strn2));
      stest    := stest - (postest1 + postest2);
   end;
   stest := which - abs(stest);
   which := which + (len1 div 2);
   perc3 := stest / which;
   if (perc3 < 0) then perc3 := 0;
 
   { ---===> Average the results of the 3 tests.         }
   {         They are weighted hence the 80, 10 and 10.  }
 
   retval := trunc(((perc1 * 80) + (perc2 * 10) + (perc3 * 10)));
 
string_match_100: {label}
 
   strmatcher := retval; { ---===> Return Percent Difference. }
end; {StrMatcher}
 
 
 
 
begin       { ---===> Stupid 5 line example. }
   clrscr;
   string_a := 'Hello World';
   string_b := 'Hello Wolrd';
   writeln('String Match Percent:', strmatcher(string_a, string_b):5);
   readln;
end. {main} { ---===> hey, I use C also :-)  }


[Back to STRINGS SWAG index]  [Back to Main SWAG index]  [Original]