/**
 * A utility class containing a method for comparing strings.
 * @author Bill Giel
 * Modified by Derek Bridge, March 2000
 *
 * Comments from Bill Giel:
 * Ported from C by Bill Giel, January 26, 1997
 * Email to: bgiel@ct2.nai.net
 * WWW Site: http://w3.nai.net/~rvdi/bgiel/bill.htm
 * Reference: Zwakenberg, Hans G., Inexact Alphanumeric Comparisons,
 *            The C Users Journal, May 1991
 *
 * The lDistance.isAlike(String requested, String found) method provides
 * provides a way to compare strings and return true if they are similar
 * enough to be considered matches. The method employs a simple metric 
 * known as the Levenstein Distance. This technique is less 'expensive' 
 * than complex phonetic comparisons, and appears to be well suited to 
 * Java programs.
 * The method below will always return false unless at least the first
 * character of each string matches. It will also only compare at most 20
 * characters if the strings are longer than that. If the difference in
 * length between the two strings exceeds the calculated threshold value,
 * the method will also return false.
 *
 * As with the original C function, this code has not been optimized
 * nor has it been benchmarked to provide maximum performance. Any 
 * suggestions to enhance performance would be welcome. 
 *
 * Email to bgiel@ct2.nai.net
 * 
 * Comments from Derek Bridge:
 * Bill Giel made modifications to the original C function by adding an
 * extra test. These have been removed by me. The code is otherwise 
 * unchanged, but has been reformatted and commented.
 */
public class StringUtility
{
   /**
    * Returns true if its parameters are similar strings; false otherwise.
    *
    * @param requested the string we're after.
    * @param found the string we're comparing it to.
    * @return true if the strings are similar; false otherwise.
    */
   public static boolean matches(String requested, String found)
   {  int i, j, r_len, f_len, threshold;
      int[][] distance = new int[ARR_SIZE][ARR_SIZE];
      /* If they don't start with the same letter, they're not similar
       */
      if (toupper(requested.charAt(0)) != toupper(found.charAt(0)) )
      {  return REJECTED;
      }
      /* How many characters will we compare? (Up to 20, i.e. COMP_LEN)
       */
      r_len = (requested.length() > COMP_LEN ? COMP_LEN : requested.length());
      f_len = (found.length() > COMP_LEN ? COMP_LEN : found.length());
      /* The threshold is how many differences we allow before rejection,
         and the value is roughly 1/4 of r_len
       */
      threshold = (int) Math.floor((double) 1 + (r_len + 2) / 4.0);
      /* So if they're already that many characters different in length,
         they're not similar
       */
      if (Math.abs(r_len - f_len) > threshold)	
      {  return REJECTED;
      }
      /* Initialise difference array
       */
      distance[0][0] = 0;
      for (j = 1; j < ARR_SIZE; j++)
      {  distance[0][j] = distance[0][j-1] + ADDITION;
      }
      for(j = 1; j < ARR_SIZE; j++)
      {  distance[j][0] = distance[j-1][0] + DELETION;
      }
      /* Find the differences and record in the array
       */
      for (i = 1; i <= r_len; i++)
      {  for(j = 1; j <= f_len; j++)
         {  distance[i][j] = smallestOf(
               distance[i-1][j-1] + 
                  ((toupper(requested.charAt(i - 1)) == 
                    toupper(found.charAt(j - 1))) ? 0 : CHANGE),
               distance[i][j - 1] + ADDITION,
               distance[i - 1][j] + DELETION);
         }
      }
      /* Test overall differences agains threshold
       */
      if (distance[r_len][f_len] <= threshold)
      {  return ACCEPTED;
      }
      else
      {  return REJECTED;
      }
   }


   /**
    * Determine smallest of three numbers.
    *
    * @param x any int
    * @param y any int
    * @param z any int
    * @return the smallest of x, y and z.
    */
   private static int smallestOf(int x, int y, int z)
   {  return ((x < y) ? Math.min(x,z) : Math.min(y,z));
   }

   /**
    * Convert characters to uppercase (assumes they're ascii)
    *
    * @param ch an ascii character.
    * @param the corresponding uppercase character.
    */
   private static char toupper(char ch)
   {  if (ch >= 'a' && ch <= 'z')
      {  return (char) (ch - 32);
      }
      else
      {  return ch;
      }
   }

   private static final int COMP_LEN = 20;
   private static final int ARR_SIZE = COMP_LEN + 1;
   private static boolean ACCEPTED = true;
   private static boolean REJECTED = false;

   /* These next values may be adjusted based on the language used, and
      other considerations. The values assigned below seem to work well
      with English. They have been modified from the original, which was
      designed for the Dutch language.
    */

   private static final int ADDITION = 1;
   private static final int CHANGE = 2;
   private static final int DELETION = 1;
}




