/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/double_metaphone.c
ViewVC logotype

Contents of /mitgcm.org/devel/buildweb/pkg/swish-e/src/double_metaphone.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch: Import, MAIN
CVS Tags: baseline, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
Importing web-site building process.

1 /*
2 $Id: double_metaphone.c,v 1.1 2002/08/20 23:20:21 whmoseley Exp $
3 **
4 **
5 ** August 20, 2002 moseley - first added to swish-e
6 **
7 ** this is a very slightly modified version of the double_metaphone.c code
8 ** from the Perl module Text::DoubleMetaphone by Maurice Aubrey, and based
9 ** on the work of Lawrence Philips.
10 ** See http://aspell.sourceforge.net/metaphone
11 **
12 ** From the Text::DoubleMetaphone README file:
13
14 DESCRIPTION
15
16 This module implements a "sounds like" algorithm developed
17 by Lawrence Philips which he published in the June, 2000 issue
18 of C/C++ Users Journal. Double Metaphone is an improved
19 version of Philips' original Metaphone algorithm.
20
21 COPYRIGHT
22
23 Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
24 All rights reserved.
25
26 This code is based heavily on the C++ implementation by
27 Lawrence Philips and incorporates several bug fixes courtesy
28 of Kevin Atkinson <kevina@users.sourceforge.net>.
29
30 This module is free software; you may redistribute it and/or
31 modify it under the same terms as Perl itself.
32
33
34 **
35 **
36 */
37
38
39
40 #include <stdio.h>
41 #include <ctype.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdarg.h>
45 #include <assert.h>
46 #include "double_metaphone.h"
47 #include "mem.h"
48
49
50 #define META_MALLOC(v,n,t) (v = (t*)emalloc(((n)*sizeof(t))))
51
52 #define META_REALLOC(v,n,t) (v = (t*)erealloc((v),((n)*sizeof(t))))
53
54 #define META_FREE(x) efree((x))
55
56
57 metastring *
58 NewMetaString(char *init_str)
59 {
60 metastring *s;
61 char empty_string[] = "";
62
63 META_MALLOC(s, 1, metastring);
64 assert( s != NULL );
65
66 if (init_str == NULL)
67 init_str = empty_string;
68 s->length = strlen(init_str);
69 /* preallocate a bit more for potential growth */
70 s->bufsize = s->length + 7;
71
72 META_MALLOC(s->str, s->bufsize, char);
73 assert( s->str != NULL );
74
75 strncpy(s->str, init_str, s->length + 1);
76 s->free_string_on_destroy = 1;
77
78 return s;
79 }
80
81
82 void
83 DestroyMetaString(metastring * s)
84 {
85 if (s == NULL)
86 return;
87
88 if (s->free_string_on_destroy && (s->str != NULL))
89 META_FREE(s->str);
90
91 META_FREE(s);
92 }
93
94
95 void
96 IncreaseBuffer(metastring * s, int chars_needed)
97 {
98 META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
99 assert( s->str != NULL );
100 s->bufsize = s->bufsize + chars_needed + 10;
101 }
102
103
104 void
105 MakeUpper(metastring * s)
106 {
107 char *i;
108
109 for (i = s->str; *i; i++)
110 {
111 *i = toupper(*i);
112 }
113 }
114
115
116 int
117 IsVowel(metastring * s, int pos)
118 {
119 char c;
120
121 if ((pos < 0) || (pos >= s->length))
122 return 0;
123
124 c = *(s->str + pos);
125 if ((c == 'A') || (c == 'E') || (c == 'I') || (c =='O') ||
126 (c =='U') || (c == 'Y'))
127 return 1;
128
129 return 0;
130 }
131
132
133 int
134 SlavoGermanic(metastring * s)
135 {
136 if ((char *) strstr(s->str, "W"))
137 return 1;
138 else if ((char *) strstr(s->str, "K"))
139 return 1;
140 else if ((char *) strstr(s->str, "CZ"))
141 return 1;
142 else if ((char *) strstr(s->str, "WITZ"))
143 return 1;
144 else
145 return 0;
146 }
147
148
149 int
150 GetLength(metastring * s)
151 {
152 return s->length;
153 }
154
155
156 char
157 GetAt(metastring * s, int pos)
158 {
159 if ((pos < 0) || (pos >= s->length))
160 return '\0';
161
162 return ((char) *(s->str + pos));
163 }
164
165
166 void
167 SetAt(metastring * s, int pos, char c)
168 {
169 if ((pos < 0) || (pos >= s->length))
170 return;
171
172 *(s->str + pos) = c;
173 }
174
175
176 /*
177 Caveats: the START value is 0 based
178 */
179 int
180 StringAt(metastring * s, int start, int length, ...)
181 {
182 char *test;
183 char *pos;
184 va_list ap;
185
186 if ((start < 0) || (start >= s->length))
187 return 0;
188
189 pos = (s->str + start);
190 va_start(ap, length);
191
192 do
193 {
194 test = va_arg(ap, char *);
195 if (*test && (strncmp(pos, test, length) == 0))
196 return 1;
197 }
198 while (strcmp(test, ""));
199
200 va_end(ap);
201
202 return 0;
203 }
204
205
206 void
207 MetaphAdd(metastring * s, char *new_str)
208 {
209 int add_length;
210
211 if (new_str == NULL)
212 return;
213
214 add_length = strlen(new_str);
215 if ((s->length + add_length) > (s->bufsize - 1))
216 {
217 IncreaseBuffer(s, add_length);
218 }
219
220 strcat(s->str, new_str);
221 s->length += add_length;
222 }
223
224
225 void
226 DoubleMetaphone(char *str, char **codes)
227 {
228 int length;
229 metastring *original;
230 metastring *primary;
231 metastring *secondary;
232 int current;
233 int last;
234
235 current = 0;
236 /* we need the real length and last prior to padding */
237 length = strlen(str);
238 last = length - 1;
239 original = NewMetaString(str);
240 /* Pad original so we can index beyond end */
241 MetaphAdd(original, " ");
242
243 primary = NewMetaString("");
244 secondary = NewMetaString("");
245 primary->free_string_on_destroy = 0;
246 secondary->free_string_on_destroy = 0;
247
248 MakeUpper(original);
249
250 /* skip these when at start of word */
251 if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
252 current += 1;
253
254 /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
255 if (GetAt(original, 0) == 'X')
256 {
257 MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
258 MetaphAdd(secondary, "S");
259 current += 1;
260 }
261
262 /* main loop */
263 while ((primary->length < 4) || (secondary->length < 4))
264 {
265 if (current >= length)
266 break;
267
268 switch (GetAt(original, current))
269 {
270 case 'A':
271 case 'E':
272 case 'I':
273 case 'O':
274 case 'U':
275 case 'Y':
276 if (current == 0)
277 {
278 /* all init vowels now map to 'A' */
279 MetaphAdd(primary, "A");
280 MetaphAdd(secondary, "A");
281 }
282 current += 1;
283 break;
284
285 case 'B':
286
287 /* "-mb", e.g", "dumb", already skipped over... */
288 MetaphAdd(primary, "P");
289 MetaphAdd(secondary, "P");
290
291 if (GetAt(original, current + 1) == 'B')
292 current += 2;
293 else
294 current += 1;
295 break;
296
297 case 'Ç':
298 MetaphAdd(primary, "S");
299 MetaphAdd(secondary, "S");
300 current += 1;
301 break;
302
303 case 'C':
304 /* various germanic */
305 if ((current > 1)
306 && !IsVowel(original, current - 2)
307 && StringAt(original, (current - 1), 3, "ACH", "")
308 && ((GetAt(original, current + 2) != 'I')
309 && ((GetAt(original, current + 2) != 'E')
310 || StringAt(original, (current - 2), 6, "BACHER",
311 "MACHER", ""))))
312 {
313 MetaphAdd(primary, "K");
314 MetaphAdd(secondary, "K");
315 current += 2;
316 break;
317 }
318
319 /* special case 'caesar' */
320 if ((current == 0)
321 && StringAt(original, current, 6, "CAESAR", ""))
322 {
323 MetaphAdd(primary, "S");
324 MetaphAdd(secondary, "S");
325 current += 2;
326 break;
327 }
328
329 /* italian 'chianti' */
330 if (StringAt(original, current, 4, "CHIA", ""))
331 {
332 MetaphAdd(primary, "K");
333 MetaphAdd(secondary, "K");
334 current += 2;
335 break;
336 }
337
338 if (StringAt(original, current, 2, "CH", ""))
339 {
340 /* find 'michael' */
341 if ((current > 0)
342 && StringAt(original, current, 4, "CHAE", ""))
343 {
344 MetaphAdd(primary, "K");
345 MetaphAdd(secondary, "X");
346 current += 2;
347 break;
348 }
349
350 /* greek roots e.g. 'chemistry', 'chorus' */
351 if ((current == 0)
352 && (StringAt(original, (current + 1), 5, "HARAC", "HARIS", "")
353 || StringAt(original, (current + 1), 3, "HOR",
354 "HYM", "HIA", "HEM", ""))
355 && !StringAt(original, 0, 5, "CHORE", ""))
356 {
357 MetaphAdd(primary, "K");
358 MetaphAdd(secondary, "K");
359 current += 2;
360 break;
361 }
362
363 /* germanic, greek, or otherwise 'ch' for 'kh' sound */
364 if (
365 (StringAt(original, 0, 4, "VAN ", "VON ", "")
366 || StringAt(original, 0, 3, "SCH", ""))
367 /* 'architect but not 'arch', 'orchestra', 'orchid' */
368 || StringAt(original, (current - 2), 6, "ORCHES",
369 "ARCHIT", "ORCHID", "")
370 || StringAt(original, (current + 2), 1, "T", "S",
371 "")
372 || ((StringAt(original, (current - 1), 1, "A", "O", "U", "E", "")
373 || (current == 0))
374 /* e.g., 'wachtler', 'wechsler', but not 'tichner' */
375 && StringAt(original, (current + 2), 1, "L", "R",
376 "N", "M", "B", "H", "F", "V", "W", " ", "")))
377 {
378 MetaphAdd(primary, "K");
379 MetaphAdd(secondary, "K");
380 }
381 else
382 {
383 if (current > 0)
384 {
385 if (StringAt(original, 0, 2, "MC", ""))
386 {
387 /* e.g., "McHugh" */
388 MetaphAdd(primary, "K");
389 MetaphAdd(secondary, "K");
390 }
391 else
392 {
393 MetaphAdd(primary, "X");
394 MetaphAdd(secondary, "K");
395 }
396 }
397 else
398 {
399 MetaphAdd(primary, "X");
400 MetaphAdd(secondary, "X");
401 }
402 }
403 current += 2;
404 break;
405 }
406 /* e.g, 'czerny' */
407 if (StringAt(original, current, 2, "CZ", "")
408 && !StringAt(original, (current - 2), 4, "WICZ", ""))
409 {
410 MetaphAdd(primary, "S");
411 MetaphAdd(secondary, "X");
412 current += 2;
413 break;
414 }
415
416 /* e.g., 'focaccia' */
417 if (StringAt(original, (current + 1), 3, "CIA", ""))
418 {
419 MetaphAdd(primary, "X");
420 MetaphAdd(secondary, "X");
421 current += 3;
422 break;
423 }
424
425 /* double 'C', but not if e.g. 'McClellan' */
426 if (StringAt(original, current, 2, "CC", "")
427 && !((current == 1) && (GetAt(original, 0) == 'M')))
428 {
429 /* 'bellocchio' but not 'bacchus' */
430 if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
431 && !StringAt(original, (current + 2), 2, "HU", ""))
432 {
433 /* 'accident', 'accede' 'succeed' */
434 if (
435 ((current == 1)
436 && (GetAt(original, current - 1) == 'A'))
437 || StringAt(original, (current - 1), 5, "UCCEE",
438 "UCCES", ""))
439 {
440 MetaphAdd(primary, "KS");
441 MetaphAdd(secondary, "KS");
442 /* 'bacci', 'bertucci', other italian */
443 }
444 else
445 {
446 MetaphAdd(primary, "X");
447 MetaphAdd(secondary, "X");
448 }
449 current += 3;
450 break;
451 }
452 else
453 { /* Pierce's rule */
454 MetaphAdd(primary, "K");
455 MetaphAdd(secondary, "K");
456 current += 2;
457 break;
458 }
459 }
460
461 if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
462 {
463 MetaphAdd(primary, "K");
464 MetaphAdd(secondary, "K");
465 current += 2;
466 break;
467 }
468
469 if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
470 {
471 /* italian vs. english */
472 if (StringAt
473 (original, current, 3, "CIO", "CIE", "CIA", ""))
474 {
475 MetaphAdd(primary, "S");
476 MetaphAdd(secondary, "X");
477 }
478 else
479 {
480 MetaphAdd(primary, "S");
481 MetaphAdd(secondary, "S");
482 }
483 current += 2;
484 break;
485 }
486
487 /* else */
488 MetaphAdd(primary, "K");
489 MetaphAdd(secondary, "K");
490
491 /* name sent in 'mac caffrey', 'mac gregor */
492 if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
493 current += 3;
494 else
495 if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
496 && !StringAt(original, (current + 1), 2, "CE", "CI", ""))
497 current += 2;
498 else
499 current += 1;
500 break;
501
502 case 'D':
503 if (StringAt(original, current, 2, "DG", ""))
504 {
505 if (StringAt(original, (current + 2), 1, "I", "E", "Y", ""))
506 {
507 /* e.g. 'edge' */
508 MetaphAdd(primary, "J");
509 MetaphAdd(secondary, "J");
510 current += 3;
511 break;
512 }
513 else
514 {
515 /* e.g. 'edgar' */
516 MetaphAdd(primary, "TK");
517 MetaphAdd(secondary, "TK");
518 current += 2;
519 break;
520 }
521 }
522
523 if (StringAt(original, current, 2, "DT", "DD", ""))
524 {
525 MetaphAdd(primary, "T");
526 MetaphAdd(secondary, "T");
527 current += 2;
528 break;
529 }
530
531 /* else */
532 MetaphAdd(primary, "T");
533 MetaphAdd(secondary, "T");
534 current += 1;
535 break;
536
537 case 'F':
538 if (GetAt(original, current + 1) == 'F')
539 current += 2;
540 else
541 current += 1;
542 MetaphAdd(primary, "F");
543 MetaphAdd(secondary, "F");
544 break;
545
546 case 'G':
547 if (GetAt(original, current + 1) == 'H')
548 {
549 if ((current > 0) && !IsVowel(original, current - 1))
550 {
551 MetaphAdd(primary, "K");
552 MetaphAdd(secondary, "K");
553 current += 2;
554 break;
555 }
556
557 if (current < 3)
558 {
559 /* 'ghislane', ghiradelli */
560 if (current == 0)
561 {
562 if (GetAt(original, current + 2) == 'I')
563 {
564 MetaphAdd(primary, "J");
565 MetaphAdd(secondary, "J");
566 }
567 else
568 {
569 MetaphAdd(primary, "K");
570 MetaphAdd(secondary, "K");
571 }
572 current += 2;
573 break;
574 }
575 }
576 /* Parker's rule (with some further refinements) - e.g., 'hugh' */
577 if (
578 ((current > 1)
579 && StringAt(original, (current - 2), 1, "B", "H", "D", ""))
580 /* e.g., 'bough' */
581 || ((current > 2)
582 && StringAt(original, (current - 3), 1, "B", "H", "D", ""))
583 /* e.g., 'broughton' */
584 || ((current > 3)
585 && StringAt(original, (current - 4), 1, "B", "H", "")))
586 {
587 current += 2;
588 break;
589 }
590 else
591 {
592 /* e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' */
593 if ((current > 2)
594 && (GetAt(original, current - 1) == 'U')
595 && StringAt(original, (current - 3), 1, "C",
596 "G", "L", "R", "T", ""))
597 {
598 MetaphAdd(primary, "F");
599 MetaphAdd(secondary, "F");
600 }
601 else if ((current > 0)
602 && GetAt(original, current - 1) != 'I')
603 {
604
605
606 MetaphAdd(primary, "K");
607 MetaphAdd(secondary, "K");
608 }
609
610 current += 2;
611 break;
612 }
613 }
614
615 if (GetAt(original, current + 1) == 'N')
616 {
617 if ((current == 1) && IsVowel(original, 0)
618 && !SlavoGermanic(original))
619 {
620 MetaphAdd(primary, "KN");
621 MetaphAdd(secondary, "N");
622 }
623 else
624 /* not e.g. 'cagney' */
625 if (!StringAt(original, (current + 2), 2, "EY", "")
626 && (GetAt(original, current + 1) != 'Y')
627 && !SlavoGermanic(original))
628 {
629 MetaphAdd(primary, "N");
630 MetaphAdd(secondary, "KN");
631 }
632 else
633 {
634 MetaphAdd(primary, "KN");
635 MetaphAdd(secondary, "KN");
636 }
637 current += 2;
638 break;
639 }
640
641 /* 'tagliaro' */
642 if (StringAt(original, (current + 1), 2, "LI", "")
643 && !SlavoGermanic(original))
644 {
645 MetaphAdd(primary, "KL");
646 MetaphAdd(secondary, "L");
647 current += 2;
648 break;
649 }
650
651 /* -ges-,-gep-,-gel-, -gie- at beginning */
652 if ((current == 0)
653 && ((GetAt(original, current + 1) == 'Y')
654 || StringAt(original, (current + 1), 2, "ES", "EP",
655 "EB", "EL", "EY", "IB", "IL", "IN", "IE",
656 "EI", "ER", "")))
657 {
658 MetaphAdd(primary, "K");
659 MetaphAdd(secondary, "J");
660 current += 2;
661 break;
662 }
663
664 /* -ger-, -gy- */
665 if (
666 (StringAt(original, (current + 1), 2, "ER", "")
667 || (GetAt(original, current + 1) == 'Y'))
668 && !StringAt(original, 0, 6, "DANGER", "RANGER", "MANGER", "")
669 && !StringAt(original, (current - 1), 1, "E", "I", "")
670 && !StringAt(original, (current - 1), 3, "RGY", "OGY",
671 ""))
672 {
673 MetaphAdd(primary, "K");
674 MetaphAdd(secondary, "J");
675 current += 2;
676 break;
677 }
678
679 /* italian e.g, 'biaggi' */
680 if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
681 || StringAt(original, (current - 1), 4, "AGGI", "OGGI", ""))
682 {
683 /* obvious germanic */
684 if (
685 (StringAt(original, 0, 4, "VAN ", "VON ", "")
686 || StringAt(original, 0, 3, "SCH", ""))
687 || StringAt(original, (current + 1), 2, "ET", ""))
688 {
689 MetaphAdd(primary, "K");
690 MetaphAdd(secondary, "K");
691 }
692 else
693 {
694 /* always soft if french ending */
695 if (StringAt
696 (original, (current + 1), 4, "IER ", ""))
697 {
698 MetaphAdd(primary, "J");
699 MetaphAdd(secondary, "J");
700 }
701 else
702 {
703 MetaphAdd(primary, "J");
704 MetaphAdd(secondary, "K");
705 }
706 }
707 current += 2;
708 break;
709 }
710
711 if (GetAt(original, current + 1) == 'G')
712 current += 2;
713 else
714 current += 1;
715 MetaphAdd(primary, "K");
716 MetaphAdd(secondary, "K");
717 break;
718
719 case 'H':
720 /* only keep if first & before vowel or btw. 2 vowels */
721 if (((current == 0) || IsVowel(original, current - 1))
722 && IsVowel(original, current + 1))
723 {
724 MetaphAdd(primary, "H");
725 MetaphAdd(secondary, "H");
726 current += 2;
727 }
728 else /* also takes care of 'HH' */
729 current += 1;
730 break;
731
732 case 'J':
733 /* obvious spanish, 'jose', 'san jacinto' */
734 if (StringAt(original, current, 4, "JOSE", "")
735 || StringAt(original, 0, 4, "SAN ", ""))
736 {
737 if (((current == 0)
738 && (GetAt(original, current + 4) == ' '))
739 || StringAt(original, 0, 4, "SAN ", ""))
740 {
741 MetaphAdd(primary, "H");
742 MetaphAdd(secondary, "H");
743 }
744 else
745 {
746 MetaphAdd(primary, "J");
747 MetaphAdd(secondary, "H");
748 }
749 current += 1;
750 break;
751 }
752
753 if ((current == 0)
754 && !StringAt(original, current, 4, "JOSE", ""))
755 {
756 MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */
757 MetaphAdd(secondary, "A");
758 }
759 else
760 {
761 /* spanish pron. of e.g. 'bajador' */
762 if (IsVowel(original, current - 1)
763 && !SlavoGermanic(original)
764 && ((GetAt(original, current + 1) == 'A')
765 || (GetAt(original, current + 1) == 'O')))
766 {
767 MetaphAdd(primary, "J");
768 MetaphAdd(secondary, "H");
769 }
770 else
771 {
772 if (current == last)
773 {
774 MetaphAdd(primary, "J");
775 MetaphAdd(secondary, "");
776 }
777 else
778 {
779 if (!StringAt(original, (current + 1), 1, "L", "T",
780 "K", "S", "N", "M", "B", "Z", "")
781 && !StringAt(original, (current - 1), 1,
782 "S", "K", "L", ""))
783 {
784 MetaphAdd(primary, "J");
785 MetaphAdd(secondary, "J");
786 }
787 }
788 }
789 }
790
791 if (GetAt(original, current + 1) == 'J') /* it could happen! */
792 current += 2;
793 else
794 current += 1;
795 break;
796
797 case 'K':
798 if (GetAt(original, current + 1) == 'K')
799 current += 2;
800 else
801 current += 1;
802 MetaphAdd(primary, "K");
803 MetaphAdd(secondary, "K");
804 break;
805
806 case 'L':
807 if (GetAt(original, current + 1) == 'L')
808 {
809 /* spanish e.g. 'cabrillo', 'gallegos' */
810 if (((current == (length - 3))
811 && StringAt(original, (current - 1), 4, "ILLO",
812 "ILLA", "ALLE", ""))
813 || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
814 || StringAt(original, last, 1, "A", "O", ""))
815 && StringAt(original, (current - 1), 4, "ALLE", "")))
816 {
817 MetaphAdd(primary, "L");
818 MetaphAdd(secondary, "");
819 current += 2;
820 break;
821 }
822 current += 2;
823 }
824 else
825 current += 1;
826 MetaphAdd(primary, "L");
827 MetaphAdd(secondary, "L");
828 break;
829
830 case 'M':
831 if ((StringAt(original, (current - 1), 3, "UMB", "")
832 && (((current + 1) == last)
833 || StringAt(original, (current + 2), 2, "ER", "")))
834 /* 'dumb','thumb' */
835 || (GetAt(original, current + 1) == 'M'))
836 current += 2;
837 else
838 current += 1;
839 MetaphAdd(primary, "M");
840 MetaphAdd(secondary, "M");
841 break;
842
843 case 'N':
844 if (GetAt(original, current + 1) == 'N')
845 current += 2;
846 else
847 current += 1;
848 MetaphAdd(primary, "N");
849 MetaphAdd(secondary, "N");
850 break;
851
852 case 'Ñ':
853 current += 1;
854 MetaphAdd(primary, "N");
855 MetaphAdd(secondary, "N");
856 break;
857
858 case 'P':
859 if (GetAt(original, current + 1) == 'H')
860 {
861 MetaphAdd(primary, "F");
862 MetaphAdd(secondary, "F");
863 current += 2;
864 break;
865 }
866
867 /* also account for "campbell", "raspberry" */
868 if (StringAt(original, (current + 1), 1, "P", "B", ""))
869 current += 2;
870 else
871 current += 1;
872 MetaphAdd(primary, "P");
873 MetaphAdd(secondary, "P");
874 break;
875
876 case 'Q':
877 if (GetAt(original, current + 1) == 'Q')
878 current += 2;
879 else
880 current += 1;
881 MetaphAdd(primary, "K");
882 MetaphAdd(secondary, "K");
883 break;
884
885 case 'R':
886 /* french e.g. 'rogier', but exclude 'hochmeier' */
887 if ((current == last)
888 && !SlavoGermanic(original)
889 && StringAt(original, (current - 2), 2, "IE", "")
890 && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
891 {
892 MetaphAdd(primary, "");
893 MetaphAdd(secondary, "R");
894 }
895 else
896 {
897 MetaphAdd(primary, "R");
898 MetaphAdd(secondary, "R");
899 }
900
901 if (GetAt(original, current + 1) == 'R')
902 current += 2;
903 else
904 current += 1;
905 break;
906
907 case 'S':
908 /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
909 if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
910 {
911 current += 1;
912 break;
913 }
914
915 /* special case 'sugar-' */
916 if ((current == 0)
917 && StringAt(original, current, 5, "SUGAR", ""))
918 {
919 MetaphAdd(primary, "X");
920 MetaphAdd(secondary, "S");
921 current += 1;
922 break;
923 }
924
925 if (StringAt(original, current, 2, "SH", ""))
926 {
927 /* germanic */
928 if (StringAt
929 (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
930 "HOLZ", ""))
931 {
932 MetaphAdd(primary, "S");
933 MetaphAdd(secondary, "S");
934 }
935 else
936 {
937 MetaphAdd(primary, "X");
938 MetaphAdd(secondary, "X");
939 }
940 current += 2;
941 break;
942 }
943
944 /* italian & armenian */
945 if (StringAt(original, current, 3, "SIO", "SIA", "")
946 || StringAt(original, current, 4, "SIAN", ""))
947 {
948 if (!SlavoGermanic(original))
949 {
950 MetaphAdd(primary, "S");
951 MetaphAdd(secondary, "X");
952 }
953 else
954 {
955 MetaphAdd(primary, "S");
956 MetaphAdd(secondary, "S");
957 }
958 current += 3;
959 break;
960 }
961
962 /* german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
963 also, -sz- in slavic language altho in hungarian it is pronounced 's' */
964 if (((current == 0)
965 && StringAt(original, (current + 1), 1, "M", "N", "L", "W", ""))
966 || StringAt(original, (current + 1), 1, "Z", ""))
967 {
968 MetaphAdd(primary, "S");
969 MetaphAdd(secondary, "X");
970 if (StringAt(original, (current + 1), 1, "Z", ""))
971 current += 2;
972 else
973 current += 1;
974 break;
975 }
976
977 if (StringAt(original, current, 2, "SC", ""))
978 {
979 /* Schlesinger's rule */
980 if (GetAt(original, current + 2) == 'H')
981 {
982 /* dutch origin, e.g. 'school', 'schooner' */
983 if (StringAt(original, (current + 3), 2, "OO", "ER", "EN",
984 "UY", "ED", "EM", ""))
985 {
986 /* 'schermerhorn', 'schenker' */
987 if (StringAt(original, (current + 3), 2, "ER", "EN", ""))
988 {
989 MetaphAdd(primary, "X");
990 MetaphAdd(secondary, "SK");
991 }
992 else
993 {
994 MetaphAdd(primary, "SK");
995 MetaphAdd(secondary, "SK");
996 }
997 current += 3;
998 break;
999 }
1000 else
1001 {
1002 if ((current == 0) && !IsVowel(original, 3)
1003 && (GetAt(original, 3) != 'W'))
1004 {
1005 MetaphAdd(primary, "X");
1006 MetaphAdd(secondary, "S");
1007 }
1008 else
1009 {
1010 MetaphAdd(primary, "X");
1011 MetaphAdd(secondary, "X");
1012 }
1013 current += 3;
1014 break;
1015 }
1016
1017 if (StringAt(original, (current + 2), 1, "I", "E", "Y", ""))
1018 {
1019 MetaphAdd(primary, "S");
1020 MetaphAdd(secondary, "S");
1021 current += 3;
1022 break;
1023 }
1024 /* else */
1025 MetaphAdd(primary, "SK");
1026 MetaphAdd(secondary, "SK");
1027 current += 3;
1028 break;
1029 }
1030 }
1031
1032 /* french e.g. 'resnais', 'artois' */
1033 if ((current == last)
1034 && StringAt(original, (current - 2), 2, "AI", "OI", ""))
1035 {
1036 MetaphAdd(primary, "");
1037 MetaphAdd(secondary, "S");
1038 }
1039 else
1040 {
1041 MetaphAdd(primary, "S");
1042 MetaphAdd(secondary, "S");
1043 }
1044
1045 if (StringAt(original, (current + 1), 1, "S", "Z", ""))
1046 current += 2;
1047 else
1048 current += 1;
1049 break;
1050
1051 case 'T':
1052 if (StringAt(original, current, 4, "TION", ""))
1053 {
1054 MetaphAdd(primary, "X");
1055 MetaphAdd(secondary, "X");
1056 current += 3;
1057 break;
1058 }
1059
1060 if (StringAt(original, current, 3, "TIA", "TCH", ""))
1061 {
1062 MetaphAdd(primary, "X");
1063 MetaphAdd(secondary, "X");
1064 current += 3;
1065 break;
1066 }
1067
1068 if (StringAt(original, current, 2, "TH", "")
1069 || StringAt(original, current, 3, "TTH", ""))
1070 {
1071 /* special case 'thomas', 'thames' or germanic */
1072 if (StringAt(original, (current + 2), 2, "OM", "AM", "")
1073 || StringAt(original, 0, 4, "VAN ", "VON ", "")
1074 || StringAt(original, 0, 3, "SCH", ""))
1075 {
1076 MetaphAdd(primary, "T");
1077 MetaphAdd(secondary, "T");
1078 }
1079 else
1080 {
1081 MetaphAdd(primary, "0");
1082 MetaphAdd(secondary, "T");
1083 }
1084 current += 2;
1085 break;
1086 }
1087
1088 if (StringAt(original, (current + 1), 1, "T", "D", ""))
1089 current += 2;
1090 else
1091 current += 1;
1092 MetaphAdd(primary, "T");
1093 MetaphAdd(secondary, "T");
1094 break;
1095
1096 case 'V':
1097 if (GetAt(original, current + 1) == 'V')
1098 current += 2;
1099 else
1100 current += 1;
1101 MetaphAdd(primary, "F");
1102 MetaphAdd(secondary, "F");
1103 break;
1104
1105 case 'W':
1106 /* can also be in middle of word */
1107 if (StringAt(original, current, 2, "WR", ""))
1108 {
1109 MetaphAdd(primary, "R");
1110 MetaphAdd(secondary, "R");
1111 current += 2;
1112 break;
1113 }
1114
1115 if ((current == 0)
1116 && (IsVowel(original, current + 1)
1117 || StringAt(original, current, 2, "WH", "")))
1118 {
1119 /* Wasserman should match Vasserman */
1120 if (IsVowel(original, current + 1))
1121 {
1122 MetaphAdd(primary, "A");
1123 MetaphAdd(secondary, "F");
1124 }
1125 else
1126 {
1127 /* need Uomo to match Womo */
1128 MetaphAdd(primary, "A");
1129 MetaphAdd(secondary, "A");
1130 }
1131 }
1132
1133 /* Arnow should match Arnoff */
1134 if (((current == last) && IsVowel(original, current - 1))
1135 || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
1136 "OWSKI", "OWSKY", "")
1137 || StringAt(original, 0, 3, "SCH", ""))
1138 {
1139 MetaphAdd(primary, "");
1140 MetaphAdd(secondary, "F");
1141 current += 1;
1142 break;
1143 }
1144
1145 /* polish e.g. 'filipowicz' */
1146 if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
1147 {
1148 MetaphAdd(primary, "TS");
1149 MetaphAdd(secondary, "FX");
1150 current += 4;
1151 break;
1152 }
1153
1154 /* else skip it */
1155 current += 1;
1156 break;
1157
1158 case 'X':
1159 /* french e.g. breaux */
1160 if (!((current == last)
1161 && (StringAt(original, (current - 3), 3, "IAU", "EAU", "")
1162 || StringAt(original, (current - 2), 2, "AU", "OU", ""))))
1163 {
1164 MetaphAdd(primary, "KS");
1165 MetaphAdd(secondary, "KS");
1166 }
1167
1168
1169 if (StringAt(original, (current + 1), 1, "C", "X", ""))
1170 current += 2;
1171 else
1172 current += 1;
1173 break;
1174
1175 case 'Z':
1176 /* chinese pinyin e.g. 'zhao' */
1177 if (GetAt(original, current + 1) == 'H')
1178 {
1179 MetaphAdd(primary, "J");
1180 MetaphAdd(secondary, "J");
1181 current += 2;
1182 break;
1183 }
1184 else if (StringAt(original, (current + 1), 2, "ZO", "ZI", "ZA", "")
1185 || (SlavoGermanic(original)
1186 && ((current > 0)
1187 && GetAt(original, current - 1) != 'T')))
1188 {
1189 MetaphAdd(primary, "S");
1190 MetaphAdd(secondary, "TS");
1191 }
1192 else
1193 {
1194 MetaphAdd(primary, "S");
1195 MetaphAdd(secondary, "S");
1196 }
1197
1198 if (GetAt(original, current + 1) == 'Z')
1199 current += 2;
1200 else
1201 current += 1;
1202 break;
1203
1204 default:
1205 current += 1;
1206 }
1207 /* printf("PRIMARY: %s\n", primary->str);
1208 printf("SECONDARY: %s\n", secondary->str); */
1209 }
1210
1211
1212 if (primary->length > 4)
1213 SetAt(primary, 4, '\0');
1214
1215 if (secondary->length > 4)
1216 SetAt(secondary, 4, '\0');
1217
1218 *codes = primary->str;
1219 *++codes = secondary->str;
1220
1221 DestroyMetaString(original);
1222 DestroyMetaString(primary);
1223 DestroyMetaString(secondary);
1224 }
1225

  ViewVC Help
Powered by ViewVC 1.1.22