mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			spellcheck: support transpositions aka Damerau-Levenshtein (PR other/69968)
gcc/fortran/ChangeLog: PR other/69968 * misc.c (gfc_closest_fuzzy_match): Update for renaming of levenshtein_distance to get_edit_distance. gcc/ChangeLog: PR other/69968 * spellcheck-tree.c (levenshtein_distance): Rename to... (get_edit_distance): ...this, and update for underlying renaming. * spellcheck-tree.h (levenshtein_distance): Rename to... (get_edit_distance): ...this. * spellcheck.c (levenshtein_distance): Rename to... (get_edit_distance): ...this. Convert from Levenshtein distance to Damerau-Levenshtein distance by supporting transpositions of adjacent characters. Rename "v1" to "v_next" and "v0" to "v_one_ago". (selftest::levenshtein_distance_unit_test_oneway): Rename to... (selftest::test_edit_distance_unit_test_oneway): ...this, and update for underlying renaming. (selftest::levenshtein_distance_unit_test): Rename to... (selftest::test_get_edit_distance_unit): ...this, and update for underlying renaming. (selftest::test_find_closest_string): Add example from PR 69968 where transposition helps (selftest::test_metric_conditions): Update for renaming. (selftest::test_metric_conditions): Likewise. (selftest::spellcheck_c_tests): Likewise. * spellcheck.h (levenshtein_distance): Rename both overloads to... (get_edit_distance): ...this. (best_match::consider): Update for renaming. gcc/testsuite/ChangeLog: PR other/69968 * gcc.dg/spellcheck-transposition.c: New test. From-SVN: r261521
This commit is contained in:
		
							parent
							
								
									e3329a782f
								
							
						
					
					
						commit
						b80a188bee
					
				|  | @ -1,3 +1,30 @@ | ||||||
|  | 2018-06-12  David Malcolm  <dmalcolm@redhat.com> | ||||||
|  | 
 | ||||||
|  | 	PR other/69968 | ||||||
|  | 	* spellcheck-tree.c (levenshtein_distance): Rename to... | ||||||
|  | 	(get_edit_distance): ...this, and update for underlying renaming. | ||||||
|  | 	* spellcheck-tree.h (levenshtein_distance): Rename to... | ||||||
|  | 	(get_edit_distance): ...this. | ||||||
|  | 	* spellcheck.c (levenshtein_distance): Rename to... | ||||||
|  | 	(get_edit_distance): ...this.  Convert from Levenshtein distance | ||||||
|  | 	to Damerau-Levenshtein distance by supporting transpositions of | ||||||
|  | 	adjacent characters.  Rename "v1" to "v_next" and "v0" to | ||||||
|  | 	"v_one_ago". | ||||||
|  | 	(selftest::levenshtein_distance_unit_test_oneway): Rename to... | ||||||
|  | 	(selftest::test_edit_distance_unit_test_oneway): ...this, and | ||||||
|  | 	update for underlying renaming. | ||||||
|  | 	(selftest::levenshtein_distance_unit_test): Rename to... | ||||||
|  | 	(selftest::test_get_edit_distance_unit): ...this, and update for | ||||||
|  | 	underlying renaming. | ||||||
|  | 	(selftest::test_find_closest_string): Add example from PR 69968 | ||||||
|  | 	where transposition helps | ||||||
|  | 	(selftest::test_metric_conditions): Update for renaming. | ||||||
|  | 	(selftest::test_metric_conditions): Likewise. | ||||||
|  | 	(selftest::spellcheck_c_tests): Likewise. | ||||||
|  | 	* spellcheck.h (levenshtein_distance): Rename both overloads to... | ||||||
|  | 	(get_edit_distance): ...this. | ||||||
|  | 	(best_match::consider): Update for renaming. | ||||||
|  | 
 | ||||||
| 2018-06-12  Martin Sebor  <msebor@redhat.com> | 2018-06-12  Martin Sebor  <msebor@redhat.com> | ||||||
| 
 | 
 | ||||||
| 	PR tree-optimization/85259 | 	PR tree-optimization/85259 | ||||||
|  |  | ||||||
|  | @ -1,3 +1,9 @@ | ||||||
|  | 2018-06-12  David Malcolm  <dmalcolm@redhat.com> | ||||||
|  | 
 | ||||||
|  | 	PR other/69968 | ||||||
|  | 	* misc.c (gfc_closest_fuzzy_match): Update for renaming of | ||||||
|  | 	levenshtein_distance to get_edit_distance. | ||||||
|  | 
 | ||||||
| 2018-06-12  Steven G. Kargl  <kargl@gcc.gnu.org> | 2018-06-12  Steven G. Kargl  <kargl@gcc.gnu.org> | ||||||
| 
 | 
 | ||||||
| 	PR fortran/44491 | 	PR fortran/44491 | ||||||
|  |  | ||||||
|  | @ -286,7 +286,7 @@ get_c_kind(const char *c_kind_name, CInteropKind_t kinds_table[]) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| /* For a given name TYPO, determine the best candidate from CANDIDATES
 | /* For a given name TYPO, determine the best candidate from CANDIDATES
 | ||||||
|    perusing Levenshtein distance.  Frees CANDIDATES before returning.  */ |    using get_edit_distance.  Frees CANDIDATES before returning.  */ | ||||||
| 
 | 
 | ||||||
| const char * | const char * | ||||||
| gfc_closest_fuzzy_match (const char *typo, char **candidates) | gfc_closest_fuzzy_match (const char *typo, char **candidates) | ||||||
|  | @ -299,7 +299,7 @@ gfc_closest_fuzzy_match (const char *typo, char **candidates) | ||||||
| 
 | 
 | ||||||
|   while (cand && *cand) |   while (cand && *cand) | ||||||
|     { |     { | ||||||
|       edit_distance_t dist = levenshtein_distance (typo, tl, *cand, |       edit_distance_t dist = get_edit_distance (typo, tl, *cand, | ||||||
| 	  strlen (*cand)); | 	  strlen (*cand)); | ||||||
|       if (dist < best_distance) |       if (dist < best_distance) | ||||||
| 	{ | 	{ | ||||||
|  |  | ||||||
|  | @ -27,15 +27,15 @@ along with GCC; see the file COPYING3.  If not see | ||||||
| #include "selftest.h" | #include "selftest.h" | ||||||
| #include "stringpool.h" | #include "stringpool.h" | ||||||
| 
 | 
 | ||||||
| /* Calculate Levenshtein distance between two identifiers.  */ | /* Calculate edit distance between two identifiers.  */ | ||||||
| 
 | 
 | ||||||
| edit_distance_t | edit_distance_t | ||||||
| levenshtein_distance (tree ident_s, tree ident_t) | get_edit_distance (tree ident_s, tree ident_t) | ||||||
| { | { | ||||||
|   gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE); |   gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE); | ||||||
|   gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE); |   gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE); | ||||||
| 
 | 
 | ||||||
|   return levenshtein_distance (IDENTIFIER_POINTER (ident_s), |   return get_edit_distance (IDENTIFIER_POINTER (ident_s), | ||||||
| 			    IDENTIFIER_LENGTH (ident_s), | 			    IDENTIFIER_LENGTH (ident_s), | ||||||
| 			    IDENTIFIER_POINTER (ident_t), | 			    IDENTIFIER_POINTER (ident_t), | ||||||
| 			    IDENTIFIER_LENGTH (ident_t)); | 			    IDENTIFIER_LENGTH (ident_t)); | ||||||
|  |  | ||||||
|  | @ -25,7 +25,7 @@ along with GCC; see the file COPYING3.  If not see | ||||||
| /* spellcheck-tree.c  */ | /* spellcheck-tree.c  */ | ||||||
| 
 | 
 | ||||||
| extern edit_distance_t | extern edit_distance_t | ||||||
| levenshtein_distance (tree ident_s, tree ident_t); | get_edit_distance (tree ident_s, tree ident_t); | ||||||
| 
 | 
 | ||||||
| extern tree | extern tree | ||||||
| find_closest_identifier (tree target, const auto_vec<tree> *candidates); | find_closest_identifier (tree target, const auto_vec<tree> *candidates); | ||||||
|  |  | ||||||
							
								
								
									
										137
									
								
								gcc/spellcheck.c
								
								
								
								
							
							
						
						
									
										137
									
								
								gcc/spellcheck.c
								
								
								
								
							|  | @ -25,14 +25,17 @@ along with GCC; see the file COPYING3.  If not see | ||||||
| #include "spellcheck.h" | #include "spellcheck.h" | ||||||
| #include "selftest.h" | #include "selftest.h" | ||||||
| 
 | 
 | ||||||
| /* The Levenshtein distance is an "edit-distance": the minimal
 | /* Get the edit distance between the two strings: the minimal
 | ||||||
|    number of one-character insertions, removals or substitutions |    number of edits that are needed to change one string into another, | ||||||
|    that are needed to change one string into another. |    where edits can be one-character insertions, removals, or substitutions, | ||||||
|  |    or transpositions of two adjacent characters (counting as one "edit"). | ||||||
| 
 | 
 | ||||||
|    This implementation uses the Wagner-Fischer algorithm.  */ |    This implementation uses the Wagner-Fischer algorithm for the | ||||||
|  |    Damerau-Levenshtein distance; specifically, the "optimal string alignment | ||||||
|  |    distance" or "restricted edit distance" variant.  */ | ||||||
| 
 | 
 | ||||||
| edit_distance_t | edit_distance_t | ||||||
| levenshtein_distance (const char *s, int len_s, | get_edit_distance (const char *s, int len_s, | ||||||
| 		   const char *t, int len_t) | 		   const char *t, int len_t) | ||||||
| { | { | ||||||
|   const bool debug = false; |   const bool debug = false; | ||||||
|  | @ -49,76 +52,86 @@ levenshtein_distance (const char *s, int len_s, | ||||||
|     return len_s; |     return len_s; | ||||||
| 
 | 
 | ||||||
|   /* We effectively build a matrix where each (i, j) contains the
 |   /* We effectively build a matrix where each (i, j) contains the
 | ||||||
|      Levenshtein distance between the prefix strings s[0:j] |      distance between the prefix strings s[0:j] and t[0:i]. | ||||||
|      and t[0:i]. |  | ||||||
|      Rather than actually build an (len_t + 1) * (len_s + 1) matrix, |      Rather than actually build an (len_t + 1) * (len_s + 1) matrix, | ||||||
|      we simply keep track of the last row, v0 and a new row, v1, |      we simply keep track of the last two rows, v_one_ago and v_two_ago, | ||||||
|      which avoids an (len_t + 1) * (len_s + 1) allocation and memory accesses |      and a new row, v_next, which avoids an (len_t + 1) * (len_s + 1) | ||||||
|      in favor of two (len_s + 1) allocations.  These could potentially be |      allocation and memory accesses in favor of three (len_s + 1) | ||||||
|  |      allocations.  These could potentially be | ||||||
|      statically-allocated if we impose a maximum length on the |      statically-allocated if we impose a maximum length on the | ||||||
|      strings of interest.  */ |      strings of interest.  */ | ||||||
|   edit_distance_t *v0 = new edit_distance_t[len_s + 1]; |   edit_distance_t *v_two_ago = new edit_distance_t[len_s + 1]; | ||||||
|   edit_distance_t *v1 = new edit_distance_t[len_s + 1]; |   edit_distance_t *v_one_ago = new edit_distance_t[len_s + 1]; | ||||||
|  |   edit_distance_t *v_next = new edit_distance_t[len_s + 1]; | ||||||
| 
 | 
 | ||||||
|   /* The first row is for the case of an empty target string, which
 |   /* The first row is for the case of an empty target string, which
 | ||||||
|      we can reach by deleting every character in the source string.  */ |      we can reach by deleting every character in the source string.  */ | ||||||
|   for (int i = 0; i < len_s + 1; i++) |   for (int i = 0; i < len_s + 1; i++) | ||||||
|     v0[i] = i; |     v_one_ago[i] = i; | ||||||
| 
 | 
 | ||||||
|   /* Build successive rows.  */ |   /* Build successive rows.  */ | ||||||
|   for (int i = 0; i < len_t; i++) |   for (int i = 0; i < len_t; i++) | ||||||
|     { |     { | ||||||
|       if (debug) |       if (debug) | ||||||
| 	{ | 	{ | ||||||
| 	  printf ("i:%i v0 = ", i); | 	  printf ("i:%i v_one_ago = ", i); | ||||||
| 	  for (int j = 0; j < len_s + 1; j++) | 	  for (int j = 0; j < len_s + 1; j++) | ||||||
| 	    printf ("%i ", v0[j]); | 	    printf ("%i ", v_one_ago[j]); | ||||||
| 	  printf ("\n"); | 	  printf ("\n"); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|       /* The initial column is for the case of an empty source string; we
 |       /* The initial column is for the case of an empty source string; we
 | ||||||
| 	 can reach prefixes of the target string of length i | 	 can reach prefixes of the target string of length i | ||||||
| 	 by inserting i characters.  */ | 	 by inserting i characters.  */ | ||||||
|       v1[0] = i + 1; |       v_next[0] = i + 1; | ||||||
| 
 | 
 | ||||||
|       /* Build the rest of the row by considering neighbors to
 |       /* Build the rest of the row by considering neighbors to
 | ||||||
| 	 the north, west and northwest.  */ | 	 the north, west and northwest.  */ | ||||||
|       for (int j = 0; j < len_s; j++) |       for (int j = 0; j < len_s; j++) | ||||||
| 	{ | 	{ | ||||||
| 	  edit_distance_t cost = (s[j] == t[i] ? 0 : 1); | 	  edit_distance_t cost = (s[j] == t[i] ? 0 : 1); | ||||||
| 	  edit_distance_t deletion     = v1[j] + 1; | 	  edit_distance_t deletion     = v_next[j] + 1; | ||||||
| 	  edit_distance_t insertion    = v0[j + 1] + 1; | 	  edit_distance_t insertion    = v_one_ago[j + 1] + 1; | ||||||
| 	  edit_distance_t substitution = v0[j] + cost; | 	  edit_distance_t substitution = v_one_ago[j] + cost; | ||||||
| 	  edit_distance_t cheapest = MIN (deletion, insertion); | 	  edit_distance_t cheapest = MIN (deletion, insertion); | ||||||
| 	  cheapest = MIN (cheapest, substitution); | 	  cheapest = MIN (cheapest, substitution); | ||||||
| 	  v1[j + 1] = cheapest; | 	  if (i > 0 && j > 0 && s[j] == t[i - 1] && s[j - 1] == t[i]) | ||||||
|  | 	    { | ||||||
|  | 	      edit_distance_t transposition = v_two_ago[j - 1] + 1; | ||||||
|  | 	      cheapest = MIN (cheapest, transposition); | ||||||
|  | 	    } | ||||||
|  | 	  v_next[j + 1] = cheapest; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|       /* Prepare to move on to next row.  */ |       /* Prepare to move on to next row.  */ | ||||||
|       for (int j = 0; j < len_s + 1; j++) |       for (int j = 0; j < len_s + 1; j++) | ||||||
| 	v0[j] = v1[j]; | 	{ | ||||||
|  | 	  v_two_ago[j] = v_one_ago[j]; | ||||||
|  | 	  v_one_ago[j] = v_next[j]; | ||||||
|  | 	} | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|   if (debug) |   if (debug) | ||||||
|     { |     { | ||||||
|       printf ("final v1 = "); |       printf ("final v_next = "); | ||||||
|       for (int j = 0; j < len_s + 1; j++) |       for (int j = 0; j < len_s + 1; j++) | ||||||
| 	printf ("%i ", v1[j]); | 	printf ("%i ", v_next[j]); | ||||||
|       printf ("\n"); |       printf ("\n"); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|   edit_distance_t result = v1[len_s]; |   edit_distance_t result = v_next[len_s]; | ||||||
|   delete[] v0; |   delete[] v_two_ago; | ||||||
|   delete[] v1; |   delete[] v_one_ago; | ||||||
|  |   delete[] v_next; | ||||||
|   return result; |   return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Calculate Levenshtein distance between two nil-terminated strings.  */ | /* Get the edit distance between two nil-terminated strings.  */ | ||||||
| 
 | 
 | ||||||
| edit_distance_t | edit_distance_t | ||||||
| levenshtein_distance (const char *s, const char *t) | get_edit_distance (const char *s, const char *t) | ||||||
| { | { | ||||||
|   return levenshtein_distance (s, strlen (s), t, strlen (t)); |   return get_edit_distance (s, strlen (s), t, strlen (t)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to
 | /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to
 | ||||||
|  | @ -155,29 +168,28 @@ namespace selftest { | ||||||
| 
 | 
 | ||||||
| /* Selftests.  */ | /* Selftests.  */ | ||||||
| 
 | 
 | ||||||
| /* Verify that the levenshtein_distance (A, B) equals the expected
 | /* Verify that get_edit_distance (A, B) equals the expected value.  */ | ||||||
|    value.  */ |  | ||||||
| 
 | 
 | ||||||
| static void | static void | ||||||
| levenshtein_distance_unit_test_oneway (const char *a, const char *b, | test_edit_distance_unit_test_oneway (const char *a, const char *b, | ||||||
| 				    edit_distance_t expected) | 				    edit_distance_t expected) | ||||||
| { | { | ||||||
|   edit_distance_t actual = levenshtein_distance (a, b); |   edit_distance_t actual = get_edit_distance (a, b); | ||||||
|   ASSERT_EQ (actual, expected); |   ASSERT_EQ (actual, expected); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Verify that both
 | /* Verify that both
 | ||||||
|      levenshtein_distance (A, B) |      get_edit_distance (A, B) | ||||||
|    and |    and | ||||||
|      levenshtein_distance (B, A) |      get_edit_distance (B, A) | ||||||
|    equal the expected value, to ensure that the function is symmetric.  */ |    equal the expected value, to ensure that the function is symmetric.  */ | ||||||
| 
 | 
 | ||||||
| static void | static void | ||||||
| levenshtein_distance_unit_test (const char *a, const char *b, | test_get_edit_distance_unit (const char *a, const char *b, | ||||||
| 			     edit_distance_t expected) | 			     edit_distance_t expected) | ||||||
| { | { | ||||||
|   levenshtein_distance_unit_test_oneway (a, b, expected); |   test_edit_distance_unit_test_oneway (a, b, expected); | ||||||
|   levenshtein_distance_unit_test_oneway (b, a, expected); |   test_edit_distance_unit_test_oneway (b, a, expected); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Verify that find_closest_string is sane.  */ | /* Verify that find_closest_string is sane.  */ | ||||||
|  | @ -215,6 +227,16 @@ test_find_closest_string () | ||||||
|      it as a suggestion will be nonsensical.  Verify that we don't offer such |      it as a suggestion will be nonsensical.  Verify that we don't offer such | ||||||
|      suggestions.  */ |      suggestions.  */ | ||||||
|   ASSERT_EQ (NULL, find_closest_string ("banana", &candidates)); |   ASSERT_EQ (NULL, find_closest_string ("banana", &candidates)); | ||||||
|  | 
 | ||||||
|  |   /* Example from PR 69968 where transposition helps.  */ | ||||||
|  |   candidates.truncate (0); | ||||||
|  |   candidates.safe_push("coordx"); | ||||||
|  |   candidates.safe_push("coordy"); | ||||||
|  |   candidates.safe_push("coordz"); | ||||||
|  |   candidates.safe_push("coordx1"); | ||||||
|  |   candidates.safe_push("coordy1"); | ||||||
|  |   candidates.safe_push("coordz1"); | ||||||
|  |   ASSERT_STREQ ("coordz1", find_closest_string ("coorzd1", &candidates)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Test data for test_metric_conditions.  */ | /* Test data for test_metric_conditions.  */ | ||||||
|  | @ -227,7 +249,7 @@ static const char * const test_data[] = { | ||||||
|   "1234567890123456789012345678901234567890123456789012345678901234567890" |   "1234567890123456789012345678901234567890123456789012345678901234567890" | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /* Verify that levenshtein_distance appears to be a sane distance function,
 | /* Verify that get_edit_distance appears to be a sane distance function,
 | ||||||
|    i.e. the conditions for being a metric.  This is done directly for a |    i.e. the conditions for being a metric.  This is done directly for a | ||||||
|    small set of examples, using test_data above.  This is O(N^3) in the size |    small set of examples, using test_data above.  This is O(N^3) in the size | ||||||
|    of the array, due to the test for the triangle inequality, so we keep the |    of the array, due to the test for the triangle inequality, so we keep the | ||||||
|  | @ -243,7 +265,7 @@ test_metric_conditions () | ||||||
|       for (int j = 0; j < num_test_cases; j++) |       for (int j = 0; j < num_test_cases; j++) | ||||||
| 	{ | 	{ | ||||||
| 	  edit_distance_t dist_ij | 	  edit_distance_t dist_ij | ||||||
| 	    = levenshtein_distance (test_data[i], test_data[j]); | 	    = get_edit_distance (test_data[i], test_data[j]); | ||||||
| 
 | 
 | ||||||
| 	  /* Identity of indiscernibles: d(i, j) > 0 iff i == j.  */ | 	  /* Identity of indiscernibles: d(i, j) > 0 iff i == j.  */ | ||||||
| 	  if (i == j) | 	  if (i == j) | ||||||
|  | @ -253,43 +275,54 @@ test_metric_conditions () | ||||||
| 
 | 
 | ||||||
| 	  /* Symmetry: d(i, j) == d(j, i).  */ | 	  /* Symmetry: d(i, j) == d(j, i).  */ | ||||||
| 	  edit_distance_t dist_ji | 	  edit_distance_t dist_ji | ||||||
| 	    = levenshtein_distance (test_data[j], test_data[i]); | 	    = get_edit_distance (test_data[j], test_data[i]); | ||||||
| 	  ASSERT_EQ (dist_ij, dist_ji); | 	  ASSERT_EQ (dist_ij, dist_ji); | ||||||
| 
 | 
 | ||||||
| 	  /* Triangle inequality.  */ | 	  /* Triangle inequality.  */ | ||||||
| 	  for (int k = 0; k < num_test_cases; k++) | 	  for (int k = 0; k < num_test_cases; k++) | ||||||
| 	    { | 	    { | ||||||
| 	      edit_distance_t dist_ik | 	      edit_distance_t dist_ik | ||||||
| 		= levenshtein_distance (test_data[i], test_data[k]); | 		= get_edit_distance (test_data[i], test_data[k]); | ||||||
| 	      edit_distance_t dist_jk | 	      edit_distance_t dist_jk | ||||||
| 		= levenshtein_distance (test_data[j], test_data[k]); | 		= get_edit_distance (test_data[j], test_data[k]); | ||||||
| 	      ASSERT_TRUE (dist_ik <= dist_ij + dist_jk); | 	      ASSERT_TRUE (dist_ik <= dist_ij + dist_jk); | ||||||
| 	    } | 	    } | ||||||
| 	} | 	} | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /* Verify levenshtein_distance for a variety of pairs of pre-canned
 | /* Verify get_edit_distance for a variety of pairs of pre-canned
 | ||||||
|    inputs, comparing against known-good values.  */ |    inputs, comparing against known-good values.  */ | ||||||
| 
 | 
 | ||||||
| void | void | ||||||
| spellcheck_c_tests () | spellcheck_c_tests () | ||||||
| { | { | ||||||
|   levenshtein_distance_unit_test ("", "nonempty", strlen ("nonempty")); |   test_get_edit_distance_unit ("", "nonempty", strlen ("nonempty")); | ||||||
|   levenshtein_distance_unit_test ("saturday", "sunday", 3); |   test_get_edit_distance_unit ("saturday", "sunday", 3); | ||||||
|   levenshtein_distance_unit_test ("foo", "m_foo", 2); |   test_get_edit_distance_unit ("foo", "m_foo", 2); | ||||||
|   levenshtein_distance_unit_test ("hello_world", "HelloWorld", 3); |   test_get_edit_distance_unit ("hello_world", "HelloWorld", 3); | ||||||
|   levenshtein_distance_unit_test |   test_get_edit_distance_unit | ||||||
|     ("the quick brown fox jumps over the lazy dog", "dog", 40); |     ("the quick brown fox jumps over the lazy dog", "dog", 40); | ||||||
|   levenshtein_distance_unit_test |   test_get_edit_distance_unit | ||||||
|     ("the quick brown fox jumps over the lazy dog", |     ("the quick brown fox jumps over the lazy dog", | ||||||
|      "the quick brown dog jumps over the lazy fox", |      "the quick brown dog jumps over the lazy fox", | ||||||
|      4); |      4); | ||||||
|   levenshtein_distance_unit_test |   test_get_edit_distance_unit | ||||||
|     ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,", |     ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,", | ||||||
|      "All your base are belong to us", |      "All your base are belong to us", | ||||||
|      44); |      44); | ||||||
|   levenshtein_distance_unit_test ("foo", "FOO", 3); |   test_get_edit_distance_unit ("foo", "FOO", 3); | ||||||
|  |   test_get_edit_distance_unit ("fee", "deed", 2); | ||||||
|  |   test_get_edit_distance_unit ("coorzd1", "coordx1", 2); | ||||||
|  | 
 | ||||||
|  |   /* Examples where transposition helps.  */ | ||||||
|  |   test_get_edit_distance_unit ("ab", "ba", 1); | ||||||
|  |   test_get_edit_distance_unit ("ba", "abc", 2); | ||||||
|  |   test_get_edit_distance_unit ("coorzd1", "coordz1", 1); | ||||||
|  |   test_get_edit_distance_unit ("abcdefghijklmnopqrstuvwxyz", | ||||||
|  | 			       "bacdefghijklmnopqrstuvwxzy", 2); | ||||||
|  |   test_get_edit_distance_unit ("saturday", "sundya", 4); | ||||||
|  |   test_get_edit_distance_unit ("signed", "singed", 1); | ||||||
| 
 | 
 | ||||||
|   test_find_closest_string (); |   test_find_closest_string (); | ||||||
|   test_metric_conditions (); |   test_metric_conditions (); | ||||||
|  |  | ||||||
|  | @ -25,11 +25,11 @@ const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; | ||||||
| 
 | 
 | ||||||
| /* spellcheck.c  */ | /* spellcheck.c  */ | ||||||
| extern edit_distance_t | extern edit_distance_t | ||||||
| levenshtein_distance (const char *s, int len_s, | get_edit_distance (const char *s, int len_s, | ||||||
| 		   const char *t, int len_t); | 		   const char *t, int len_t); | ||||||
| 
 | 
 | ||||||
| extern edit_distance_t | extern edit_distance_t | ||||||
| levenshtein_distance (const char *s, const char *t); | get_edit_distance (const char *s, const char *t); | ||||||
| 
 | 
 | ||||||
| extern const char * | extern const char * | ||||||
| find_closest_string (const char *target, | find_closest_string (const char *target, | ||||||
|  | @ -73,7 +73,7 @@ struct edit_distance_traits<const char *> | ||||||
| 
 | 
 | ||||||
|    This type accumulates the best possible match against GOAL_TYPE for |    This type accumulates the best possible match against GOAL_TYPE for | ||||||
|    a sequence of elements of CANDIDATE_TYPE, whilst minimizing the |    a sequence of elements of CANDIDATE_TYPE, whilst minimizing the | ||||||
|    number of calls to levenshtein_distance and to |    number of calls to get_edit_distance and to | ||||||
|    edit_distance_traits<T>::get_length.  */ |    edit_distance_traits<T>::get_length.  */ | ||||||
| 
 | 
 | ||||||
| template <typename GOAL_TYPE, typename CANDIDATE_TYPE> | template <typename GOAL_TYPE, typename CANDIDATE_TYPE> | ||||||
|  | @ -126,7 +126,7 @@ class best_match | ||||||
|     /* Otherwise, compute the distance and see if the candidate
 |     /* Otherwise, compute the distance and see if the candidate
 | ||||||
|        has beaten the previous best value.  */ |        has beaten the previous best value.  */ | ||||||
|     edit_distance_t dist |     edit_distance_t dist | ||||||
|       = levenshtein_distance (m_goal, m_goal_len, |       = get_edit_distance (m_goal, m_goal_len, | ||||||
| 			   candidate_traits::get_string (candidate), | 			   candidate_traits::get_string (candidate), | ||||||
| 			   candidate_len); | 			   candidate_len); | ||||||
|     if (dist < m_best_distance) |     if (dist < m_best_distance) | ||||||
|  |  | ||||||
|  | @ -1,3 +1,8 @@ | ||||||
|  | 2018-06-12  David Malcolm  <dmalcolm@redhat.com> | ||||||
|  | 
 | ||||||
|  | 	PR other/69968 | ||||||
|  | 	* gcc.dg/spellcheck-transposition.c: New test. | ||||||
|  | 
 | ||||||
| 2018-06-12  Steven G. Kargl  <kargl@gcc.gnu.org> | 2018-06-12  Steven G. Kargl  <kargl@gcc.gnu.org> | ||||||
| 
 | 
 | ||||||
| 	PR fortran/44491 | 	PR fortran/44491 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,20 @@ | ||||||
|  | /* PR other/69968.  */ | ||||||
|  | 
 | ||||||
|  | struct { | ||||||
|  |   int coordx, coordy, coordz; | ||||||
|  |   int coordx1, coordy1, coordz1; | ||||||
|  | } c; | ||||||
|  | 
 | ||||||
|  | /* Consider the misspelling "coorzd1".
 | ||||||
|  | 
 | ||||||
|  |    With Levenshtein distance, the misspelling has an edit distance of 2 | ||||||
|  |    to all 6 of the fields (e.g. via a deletion and a substitution for the | ||||||
|  |    first three, and via deletion and insertion for the second three). | ||||||
|  |     | ||||||
|  |    With Damerau-Levenshtein, the misspelling has an edit distance of 1 | ||||||
|  |    via transposition to "coordz1", and 2 to the other fields.  */ | ||||||
|  | 
 | ||||||
|  | void foo (void) | ||||||
|  | { | ||||||
|  |   c.coorzd1 = c.coordy; /* { dg-error "has no member named 'coorzd1'; did you mean 'coordz1'" } */ | ||||||
|  | } | ||||||
		Loading…
	
		Reference in New Issue
	
	 David Malcolm
						David Malcolm