/**
 * A utility class containing a method for comparing strings.
 * @author Bill Giel
 * Modified by Derek Bridge, March 2000
 *
 * Comments from Bill Giel:
 * Ported from C by Bill Giel, January 26, 1997
 * Email to: bgiel@ct2.nai.net
 * WWW Site: http://w3.nai.net/~rvdi/bgiel/bill.htm
 * Reference: Zwakenberg, Hans G., Inexact Alphanumeric Comparisons,
 *            The C Users Journal, May 1991
 *
 * The lDistance.isAlike(String requested, String found) method provides
 * provides a way to compare strings and return true if they are similar
 * enough to be considered matches. The method employs a simple metric 
 * known as the Levenstein Distance. This technique is less 'expensive' 
 * than complex phonetic comparisons, and appears to be well suited to 
 * Java programs.
 * The method below will always return false unless at least the first
 * character of each string matches. It will also only compare at most 20
 * characters if the strings are longer than that. If the difference in
 * length between the two strings exceeds the calculated threshold value,
 * the method will also return false.
 *
 * As with the original C function, this code has not been optimized
 * nor has it been benchmarked to provide maximum performance. Any 
 * suggestions to enhance performance would be welcome. 
 *
 * Email to bgiel@ct2.nai.net
 * 
 * Comments from Derek Bridge:
 * Bill Giel made modifications to the original C function by adding an
 * extra test. These have been removed by me. The code is otherwise 
 * unchanged, but has been reformatted and commented.
 */
public class StringUtility
{
   /**
    * Returns true if its parameters are similar strings; false otherwise.
    *
    * @param requested the string we're after.
    * @param found the string we're comparing it to.
    * @return true if the strings are similar; false otherwise.
    */
   public static boolean matches(String requested, String found)
   {  int i, j, r_len, f_len, threshold;
      int[][] distance = new int[ARR_SIZE][ARR_SIZE];
      // If they don't start with the same letter, they're not similar
      if (toupper(requested.charAt(0)) != toupper(found.charAt(0)) )
      {  return REJECTED;
      }
      // How many characters will we compare? (Up to 20, i.e. COMP_LEN)
      r_len = (requested.length() > COMP_LEN ? COMP_LEN : requested.length());
      f_len = (found.length() > COMP_LEN ? COMP_LEN : found.length());
      // The threshold is how many differences we allow before rejection,
      // and the value is roughly 1/4 of r_len
      threshold = (int) Math.floor((double) 1 + (r_len + 2) / 4.0);
      // So if they're already that many characters different in length,
      // they're not similar
      if (Math.abs(r_len - f_len) > threshold)	
      {  return REJECTED;
      }
      // Initialise difference array
      distance[0][0] = 0;
      for (j = 1; j < ARR_SIZE; j++)
      {  distance[0][j] = distance[0][j-1] + ADDITION;
      }
      for(j = 1; j < ARR_SIZE; j++)
      {  distance[j][0] = distance[j-1][0] + DELETION;
      }
      // Find the differences and record in the array
      for (i = 1; i <= r_len; i++)
      {  for(j = 1; j <= f_len; j++)
         {  distance[i][j] = smallestOf(
               distance[i-1][j-1] + 
                  ((toupper(requested.charAt(i - 1)) == 
                    toupper(found.charAt(j - 1))) ? 0 : CHANGE),
               distance[i][j - 1] + ADDITION,
               distance[i - 1][j] + DELETION);
         }
      }
      // Test overall differences agains threshold
      if (distance[r_len][f_len] <= threshold)
      {  return ACCEPTED;
      }
      else
      {  return REJECTED;
      }
   }


   /**
    * Determine smallest of three numbers.
    *
    * @param x any int
    * @param y any int
    * @param z any int
    * @return the smallest of x, y and z.
    */
   private static int smallestOf(int x, int y, int z)
   {  return ((x < y) ? Math.min(x,z) : Math.min(y,z));
   }

   /**
    * Convert characters to uppercase (assumes they're ascii)
    *
    * @param ch an ascii character.
    * @param the corresponding uppercase character.
    */
   private static char toupper(char ch)
   {  if (ch >= 'a' && ch <= 'z')
      {  return (char) (ch - 32);
      }
      else
      {  return ch;
      }
   }

   private static final int COMP_LEN = 20;
   private static final int ARR_SIZE = COMP_LEN + 1;
   private static boolean ACCEPTED = true;
   private static boolean REJECTED = false;

   // These next values may be adjusted based on the language used, and
   // other considerations. The values assigned below seem to work well
   // with English. They have been modified from the original, which was
   // designed for the Dutch language.

   private static final int ADDITION = 1;
   private static final int CHANGE = 2;
   private static final int DELETION = 1;
}




