1 |
/* |
2 |
** Copyright (C) 1999 OpenSA Project |
3 |
** |
4 |
* Reference: Adapted from Knuth, D.E. (1973) The art of computer programming; |
5 |
* Volume 3: Sorting and searching. Addison-Wesley Publishing Company: |
6 |
* Reading, Mass. Page 392. |
7 |
* |
8 |
* 1. Retain the first letter of the name, and drop all occurrences of |
9 |
* a, e, h, i, o, u, w, y in other positions. |
10 |
* |
11 |
* 2. Assign the following numbers to the remaining letters after the first: |
12 |
* b, f, p, v -> 1 l -> 4 |
13 |
* c, g, j, k, q, s, x, z -> 2 m, n -> 5 |
14 |
* d, t -> 3 r -> 6 |
15 |
* |
16 |
* 3. If two or more letters with the same code were adjacent in the original |
17 |
* name (before step 1), omit all but the first. |
18 |
* |
19 |
* 4. Convert to the form ``letter, digit, digit, digit'' by adding trailing |
20 |
* zeros (if there are less than three digits), or by dropping rightmost |
21 |
* digits (if there are more than three). |
22 |
* |
23 |
* The examples given in the book are: |
24 |
* |
25 |
* Euler, Ellery E460 |
26 |
* Gauss, Ghosh G200 |
27 |
* Hilbert, Heilbronn H416 |
28 |
* Knuth, Kant K530 |
29 |
* Lloyd, Ladd L300 |
30 |
* Lukasiewicz, Lissajous L222 |
31 |
* |
32 |
* Most algorithms fail in two ways: |
33 |
* 1. they omit adjacent letters with the same code AFTER step 1, not before. |
34 |
* 2. they do not omit adjacent letters with the same code at the beginning |
35 |
* of the name. |
36 |
* |
37 |
*/ |
38 |
|
39 |
#include "swish.h" |
40 |
#include <stdio.h> |
41 |
#include <stdlib.h> |
42 |
#include <string.h> |
43 |
#include <ctype.h> |
44 |
|
45 |
#include "soundex.h" |
46 |
|
47 |
int soundex(word) |
48 |
char *word; /* in/out: Target word */ |
49 |
{ |
50 |
/* Misc Stuff */ |
51 |
char u, l ; |
52 |
int i, j, n; |
53 |
/* Resultant Sound Code */ |
54 |
char soundCode[5] = "0000\0"; |
55 |
/* Group Number Lookup Table */ |
56 |
static char soundTable[26] = |
57 |
{0, /* A */ |
58 |
'1', /* B */ |
59 |
'2', /* C */ |
60 |
'3', /* D */ |
61 |
0, /* E */ |
62 |
'1', /* F */ |
63 |
'2', /* G */ |
64 |
0, /* H */ |
65 |
0, /* I */ |
66 |
'2', /* J */ |
67 |
'2', /* K */ |
68 |
'4', /* L */ |
69 |
'5', /* M */ |
70 |
'5', /* N */ |
71 |
0, /* O */ |
72 |
'1', /* P */ |
73 |
'2', /* Q */ |
74 |
'6', /* R */ |
75 |
'2', /* S */ |
76 |
'3', /* T */ |
77 |
0, /* U */ |
78 |
'1', /* V */ |
79 |
0, /* W */ |
80 |
'2', /* X */ |
81 |
0, /* Y */ |
82 |
'2'}; /* Z */ |
83 |
#ifdef _DEBUG |
84 |
/* Debug to console */ |
85 |
printf("# %15s: %s ", "soundex.c", word); |
86 |
#endif |
87 |
|
88 |
/* Make sure it actually starts with a letter */ |
89 |
if(!isalpha((int)((unsigned char)word[0]))) return soundXit(); |
90 |
#ifdef _DEBUG |
91 |
/* Debug to console */ |
92 |
printf("isalpha, "); |
93 |
#endif |
94 |
|
95 |
/* Get string length and make sure its at least 3 characters */ |
96 |
if((n = (int)strlen(word)) < 3) return soundXit(); |
97 |
#ifdef _DEBUG |
98 |
/* Debug to console */ |
99 |
printf("=>3, "); |
100 |
#endif |
101 |
|
102 |
/* Convert chars to lower case and strip non-letter chars */ |
103 |
j = 0; |
104 |
for (i = 0; i < n; i++) { |
105 |
u = tolower((unsigned char)word[i]); |
106 |
if ((u > 96) && (u < 123)) { |
107 |
word[j] = u; |
108 |
j++; |
109 |
} |
110 |
} |
111 |
|
112 |
/* terminate string */ |
113 |
word[j] = 0; |
114 |
|
115 |
/* String length again */ |
116 |
n = strlen(word); |
117 |
|
118 |
soundCode[0] = word[0]; |
119 |
|
120 |
/* remember first char */ |
121 |
l = soundTable[((word[0]) - 97)]; |
122 |
|
123 |
j = 1; |
124 |
|
125 |
/* build soundex string */ |
126 |
for (i = 1; i < n && j < 4; i++) { |
127 |
u = soundTable[((word[i]) - 97)]; |
128 |
|
129 |
if (u != l) { |
130 |
if (u != 0) { |
131 |
soundCode[(int) j++] = u; |
132 |
} |
133 |
l = u; |
134 |
} |
135 |
} |
136 |
strcpy(word, soundCode); |
137 |
#ifdef _DEBUG |
138 |
/* Debug to console */ |
139 |
printf("-> \"%s\"\n", word); |
140 |
#endif |
141 |
|
142 |
return(1); |
143 |
} |
144 |
|
145 |
int soundXit(void) |
146 |
{ |
147 |
#ifdef _DEBUG |
148 |
printf("was left as is...\n"); |
149 |
#endif |
150 |
return(1); |
151 |
} |