1 |
adcroft |
1.1 |
/* |
2 |
|
|
** Copyright (C) 1999 OpenSA Project |
3 |
|
|
** |
4 |
|
|
* Reference: Adapted from Knuth, D.E. (1973) The art of computer programming; |
5 |
|
|
* Volume 3: Sorting and searching. Addison-Wesley Publishing Company: |
6 |
|
|
* Reading, Mass. Page 392. |
7 |
|
|
* |
8 |
|
|
* 1. Retain the first letter of the name, and drop all occurrences of |
9 |
|
|
* a, e, h, i, o, u, w, y in other positions. |
10 |
|
|
* |
11 |
|
|
* 2. Assign the following numbers to the remaining letters after the first: |
12 |
|
|
* b, f, p, v -> 1 l -> 4 |
13 |
|
|
* c, g, j, k, q, s, x, z -> 2 m, n -> 5 |
14 |
|
|
* d, t -> 3 r -> 6 |
15 |
|
|
* |
16 |
|
|
* 3. If two or more letters with the same code were adjacent in the original |
17 |
|
|
* name (before step 1), omit all but the first. |
18 |
|
|
* |
19 |
|
|
* 4. Convert to the form ``letter, digit, digit, digit'' by adding trailing |
20 |
|
|
* zeros (if there are less than three digits), or by dropping rightmost |
21 |
|
|
* digits (if there are more than three). |
22 |
|
|
* |
23 |
|
|
* The examples given in the book are: |
24 |
|
|
* |
25 |
|
|
* Euler, Ellery E460 |
26 |
|
|
* Gauss, Ghosh G200 |
27 |
|
|
* Hilbert, Heilbronn H416 |
28 |
|
|
* Knuth, Kant K530 |
29 |
|
|
* Lloyd, Ladd L300 |
30 |
|
|
* Lukasiewicz, Lissajous L222 |
31 |
|
|
* |
32 |
|
|
* Most algorithms fail in two ways: |
33 |
|
|
* 1. they omit adjacent letters with the same code AFTER step 1, not before. |
34 |
|
|
* 2. they do not omit adjacent letters with the same code at the beginning |
35 |
|
|
* of the name. |
36 |
|
|
* |
37 |
|
|
*/ |
38 |
|
|
|
39 |
|
|
#include "swish.h" |
40 |
|
|
#include <stdio.h> |
41 |
|
|
#include <stdlib.h> |
42 |
|
|
#include <string.h> |
43 |
|
|
#include <ctype.h> |
44 |
|
|
|
45 |
|
|
#include "soundex.h" |
46 |
|
|
|
47 |
|
|
int soundex(word) |
48 |
|
|
char *word; /* in/out: Target word */ |
49 |
|
|
{ |
50 |
|
|
/* Misc Stuff */ |
51 |
|
|
char u, l ; |
52 |
|
|
int i, j, n; |
53 |
|
|
/* Resultant Sound Code */ |
54 |
|
|
char soundCode[5] = "0000\0"; |
55 |
|
|
/* Group Number Lookup Table */ |
56 |
|
|
static char soundTable[26] = |
57 |
|
|
{0, /* A */ |
58 |
|
|
'1', /* B */ |
59 |
|
|
'2', /* C */ |
60 |
|
|
'3', /* D */ |
61 |
|
|
0, /* E */ |
62 |
|
|
'1', /* F */ |
63 |
|
|
'2', /* G */ |
64 |
|
|
0, /* H */ |
65 |
|
|
0, /* I */ |
66 |
|
|
'2', /* J */ |
67 |
|
|
'2', /* K */ |
68 |
|
|
'4', /* L */ |
69 |
|
|
'5', /* M */ |
70 |
|
|
'5', /* N */ |
71 |
|
|
0, /* O */ |
72 |
|
|
'1', /* P */ |
73 |
|
|
'2', /* Q */ |
74 |
|
|
'6', /* R */ |
75 |
|
|
'2', /* S */ |
76 |
|
|
'3', /* T */ |
77 |
|
|
0, /* U */ |
78 |
|
|
'1', /* V */ |
79 |
|
|
0, /* W */ |
80 |
|
|
'2', /* X */ |
81 |
|
|
0, /* Y */ |
82 |
|
|
'2'}; /* Z */ |
83 |
|
|
#ifdef _DEBUG |
84 |
|
|
/* Debug to console */ |
85 |
|
|
printf("# %15s: %s ", "soundex.c", word); |
86 |
|
|
#endif |
87 |
|
|
|
88 |
|
|
/* Make sure it actually starts with a letter */ |
89 |
|
|
if(!isalpha((int)((unsigned char)word[0]))) return soundXit(); |
90 |
|
|
#ifdef _DEBUG |
91 |
|
|
/* Debug to console */ |
92 |
|
|
printf("isalpha, "); |
93 |
|
|
#endif |
94 |
|
|
|
95 |
|
|
/* Get string length and make sure its at least 3 characters */ |
96 |
|
|
if((n = (int)strlen(word)) < 3) return soundXit(); |
97 |
|
|
#ifdef _DEBUG |
98 |
|
|
/* Debug to console */ |
99 |
|
|
printf("=>3, "); |
100 |
|
|
#endif |
101 |
|
|
|
102 |
|
|
/* Convert chars to lower case and strip non-letter chars */ |
103 |
|
|
j = 0; |
104 |
|
|
for (i = 0; i < n; i++) { |
105 |
|
|
u = tolower((unsigned char)word[i]); |
106 |
|
|
if ((u > 96) && (u < 123)) { |
107 |
|
|
word[j] = u; |
108 |
|
|
j++; |
109 |
|
|
} |
110 |
|
|
} |
111 |
|
|
|
112 |
|
|
/* terminate string */ |
113 |
|
|
word[j] = 0; |
114 |
|
|
|
115 |
|
|
/* String length again */ |
116 |
|
|
n = strlen(word); |
117 |
|
|
|
118 |
|
|
soundCode[0] = word[0]; |
119 |
|
|
|
120 |
|
|
/* remember first char */ |
121 |
|
|
l = soundTable[((word[0]) - 97)]; |
122 |
|
|
|
123 |
|
|
j = 1; |
124 |
|
|
|
125 |
|
|
/* build soundex string */ |
126 |
|
|
for (i = 1; i < n && j < 4; i++) { |
127 |
|
|
u = soundTable[((word[i]) - 97)]; |
128 |
|
|
|
129 |
|
|
if (u != l) { |
130 |
|
|
if (u != 0) { |
131 |
|
|
soundCode[(int) j++] = u; |
132 |
|
|
} |
133 |
|
|
l = u; |
134 |
|
|
} |
135 |
|
|
} |
136 |
|
|
strcpy(word, soundCode); |
137 |
|
|
#ifdef _DEBUG |
138 |
|
|
/* Debug to console */ |
139 |
|
|
printf("-> \"%s\"\n", word); |
140 |
|
|
#endif |
141 |
|
|
|
142 |
|
|
return(1); |
143 |
|
|
} |
144 |
|
|
|
145 |
|
|
int soundXit(void) |
146 |
|
|
{ |
147 |
|
|
#ifdef _DEBUG |
148 |
|
|
printf("was left as is...\n"); |
149 |
|
|
#endif |
150 |
|
|
return(1); |
151 |
|
|
} |