/[MITgcm]/mitgcm.org/devel/buildweb/pkg/swish-e/src/double_metaphone.c
ViewVC logotype

Annotation of /mitgcm.org/devel/buildweb/pkg/swish-e/src/double_metaphone.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.1 - (hide annotations) (download)
Fri Sep 20 19:47:29 2002 UTC (22 years, 10 months ago) by adcroft
Branch point for: Import, MAIN
File MIME type: text/plain
Initial revision

1 adcroft 1.1 /*
2     $Id: double_metaphone.c,v 1.1 2002/08/20 23:20:21 whmoseley Exp $
3     **
4     **
5     ** August 20, 2002 moseley - first added to swish-e
6     **
7     ** this is a very slightly modified version of the double_metaphone.c code
8     ** from the Perl module Text::DoubleMetaphone by Maurice Aubrey, and based
9     ** on the work of Lawrence Philips.
10     ** See http://aspell.sourceforge.net/metaphone
11     **
12     ** From the Text::DoubleMetaphone README file:
13    
14     DESCRIPTION
15    
16     This module implements a "sounds like" algorithm developed
17     by Lawrence Philips which he published in the June, 2000 issue
18     of C/C++ Users Journal. Double Metaphone is an improved
19     version of Philips' original Metaphone algorithm.
20    
21     COPYRIGHT
22    
23     Copyright 2000, Maurice Aubrey <maurice@hevanet.com>.
24     All rights reserved.
25    
26     This code is based heavily on the C++ implementation by
27     Lawrence Philips and incorporates several bug fixes courtesy
28     of Kevin Atkinson <kevina@users.sourceforge.net>.
29    
30     This module is free software; you may redistribute it and/or
31     modify it under the same terms as Perl itself.
32    
33    
34     **
35     **
36     */
37    
38    
39    
40     #include <stdio.h>
41     #include <ctype.h>
42     #include <stdlib.h>
43     #include <string.h>
44     #include <stdarg.h>
45     #include <assert.h>
46     #include "double_metaphone.h"
47     #include "mem.h"
48    
49    
50     #define META_MALLOC(v,n,t) (v = (t*)emalloc(((n)*sizeof(t))))
51    
52     #define META_REALLOC(v,n,t) (v = (t*)erealloc((v),((n)*sizeof(t))))
53    
54     #define META_FREE(x) efree((x))
55    
56    
57     metastring *
58     NewMetaString(char *init_str)
59     {
60     metastring *s;
61     char empty_string[] = "";
62    
63     META_MALLOC(s, 1, metastring);
64     assert( s != NULL );
65    
66     if (init_str == NULL)
67     init_str = empty_string;
68     s->length = strlen(init_str);
69     /* preallocate a bit more for potential growth */
70     s->bufsize = s->length + 7;
71    
72     META_MALLOC(s->str, s->bufsize, char);
73     assert( s->str != NULL );
74    
75     strncpy(s->str, init_str, s->length + 1);
76     s->free_string_on_destroy = 1;
77    
78     return s;
79     }
80    
81    
82     void
83     DestroyMetaString(metastring * s)
84     {
85     if (s == NULL)
86     return;
87    
88     if (s->free_string_on_destroy && (s->str != NULL))
89     META_FREE(s->str);
90    
91     META_FREE(s);
92     }
93    
94    
95     void
96     IncreaseBuffer(metastring * s, int chars_needed)
97     {
98     META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char);
99     assert( s->str != NULL );
100     s->bufsize = s->bufsize + chars_needed + 10;
101     }
102    
103    
104     void
105     MakeUpper(metastring * s)
106     {
107     char *i;
108    
109     for (i = s->str; *i; i++)
110     {
111     *i = toupper(*i);
112     }
113     }
114    
115    
116     int
117     IsVowel(metastring * s, int pos)
118     {
119     char c;
120    
121     if ((pos < 0) || (pos >= s->length))
122     return 0;
123    
124     c = *(s->str + pos);
125     if ((c == 'A') || (c == 'E') || (c == 'I') || (c =='O') ||
126     (c =='U') || (c == 'Y'))
127     return 1;
128    
129     return 0;
130     }
131    
132    
133     int
134     SlavoGermanic(metastring * s)
135     {
136     if ((char *) strstr(s->str, "W"))
137     return 1;
138     else if ((char *) strstr(s->str, "K"))
139     return 1;
140     else if ((char *) strstr(s->str, "CZ"))
141     return 1;
142     else if ((char *) strstr(s->str, "WITZ"))
143     return 1;
144     else
145     return 0;
146     }
147    
148    
149     int
150     GetLength(metastring * s)
151     {
152     return s->length;
153     }
154    
155    
156     char
157     GetAt(metastring * s, int pos)
158     {
159     if ((pos < 0) || (pos >= s->length))
160     return '\0';
161    
162     return ((char) *(s->str + pos));
163     }
164    
165    
166     void
167     SetAt(metastring * s, int pos, char c)
168     {
169     if ((pos < 0) || (pos >= s->length))
170     return;
171    
172     *(s->str + pos) = c;
173     }
174    
175    
176     /*
177     Caveats: the START value is 0 based
178     */
179     int
180     StringAt(metastring * s, int start, int length, ...)
181     {
182     char *test;
183     char *pos;
184     va_list ap;
185    
186     if ((start < 0) || (start >= s->length))
187     return 0;
188    
189     pos = (s->str + start);
190     va_start(ap, length);
191    
192     do
193     {
194     test = va_arg(ap, char *);
195     if (*test && (strncmp(pos, test, length) == 0))
196     return 1;
197     }
198     while (strcmp(test, ""));
199    
200     va_end(ap);
201    
202     return 0;
203     }
204    
205    
206     void
207     MetaphAdd(metastring * s, char *new_str)
208     {
209     int add_length;
210    
211     if (new_str == NULL)
212     return;
213    
214     add_length = strlen(new_str);
215     if ((s->length + add_length) > (s->bufsize - 1))
216     {
217     IncreaseBuffer(s, add_length);
218     }
219    
220     strcat(s->str, new_str);
221     s->length += add_length;
222     }
223    
224    
225     void
226     DoubleMetaphone(char *str, char **codes)
227     {
228     int length;
229     metastring *original;
230     metastring *primary;
231     metastring *secondary;
232     int current;
233     int last;
234    
235     current = 0;
236     /* we need the real length and last prior to padding */
237     length = strlen(str);
238     last = length - 1;
239     original = NewMetaString(str);
240     /* Pad original so we can index beyond end */
241     MetaphAdd(original, " ");
242    
243     primary = NewMetaString("");
244     secondary = NewMetaString("");
245     primary->free_string_on_destroy = 0;
246     secondary->free_string_on_destroy = 0;
247    
248     MakeUpper(original);
249    
250     /* skip these when at start of word */
251     if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", ""))
252     current += 1;
253    
254     /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */
255     if (GetAt(original, 0) == 'X')
256     {
257     MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */
258     MetaphAdd(secondary, "S");
259     current += 1;
260     }
261    
262     /* main loop */
263     while ((primary->length < 4) || (secondary->length < 4))
264     {
265     if (current >= length)
266     break;
267    
268     switch (GetAt(original, current))
269     {
270     case 'A':
271     case 'E':
272     case 'I':
273     case 'O':
274     case 'U':
275     case 'Y':
276     if (current == 0)
277     {
278     /* all init vowels now map to 'A' */
279     MetaphAdd(primary, "A");
280     MetaphAdd(secondary, "A");
281     }
282     current += 1;
283     break;
284    
285     case 'B':
286    
287     /* "-mb", e.g", "dumb", already skipped over... */
288     MetaphAdd(primary, "P");
289     MetaphAdd(secondary, "P");
290    
291     if (GetAt(original, current + 1) == 'B')
292     current += 2;
293     else
294     current += 1;
295     break;
296    
297     case 'Ç':
298     MetaphAdd(primary, "S");
299     MetaphAdd(secondary, "S");
300     current += 1;
301     break;
302    
303     case 'C':
304     /* various germanic */
305     if ((current > 1)
306     && !IsVowel(original, current - 2)
307     && StringAt(original, (current - 1), 3, "ACH", "")
308     && ((GetAt(original, current + 2) != 'I')
309     && ((GetAt(original, current + 2) != 'E')
310     || StringAt(original, (current - 2), 6, "BACHER",
311     "MACHER", ""))))
312     {
313     MetaphAdd(primary, "K");
314     MetaphAdd(secondary, "K");
315     current += 2;
316     break;
317     }
318    
319     /* special case 'caesar' */
320     if ((current == 0)
321     && StringAt(original, current, 6, "CAESAR", ""))
322     {
323     MetaphAdd(primary, "S");
324     MetaphAdd(secondary, "S");
325     current += 2;
326     break;
327     }
328    
329     /* italian 'chianti' */
330     if (StringAt(original, current, 4, "CHIA", ""))
331     {
332     MetaphAdd(primary, "K");
333     MetaphAdd(secondary, "K");
334     current += 2;
335     break;
336     }
337    
338     if (StringAt(original, current, 2, "CH", ""))
339     {
340     /* find 'michael' */
341     if ((current > 0)
342     && StringAt(original, current, 4, "CHAE", ""))
343     {
344     MetaphAdd(primary, "K");
345     MetaphAdd(secondary, "X");
346     current += 2;
347     break;
348     }
349    
350     /* greek roots e.g. 'chemistry', 'chorus' */
351     if ((current == 0)
352     && (StringAt(original, (current + 1), 5, "HARAC", "HARIS", "")
353     || StringAt(original, (current + 1), 3, "HOR",
354     "HYM", "HIA", "HEM", ""))
355     && !StringAt(original, 0, 5, "CHORE", ""))
356     {
357     MetaphAdd(primary, "K");
358     MetaphAdd(secondary, "K");
359     current += 2;
360     break;
361     }
362    
363     /* germanic, greek, or otherwise 'ch' for 'kh' sound */
364     if (
365     (StringAt(original, 0, 4, "VAN ", "VON ", "")
366     || StringAt(original, 0, 3, "SCH", ""))
367     /* 'architect but not 'arch', 'orchestra', 'orchid' */
368     || StringAt(original, (current - 2), 6, "ORCHES",
369     "ARCHIT", "ORCHID", "")
370     || StringAt(original, (current + 2), 1, "T", "S",
371     "")
372     || ((StringAt(original, (current - 1), 1, "A", "O", "U", "E", "")
373     || (current == 0))
374     /* e.g., 'wachtler', 'wechsler', but not 'tichner' */
375     && StringAt(original, (current + 2), 1, "L", "R",
376     "N", "M", "B", "H", "F", "V", "W", " ", "")))
377     {
378     MetaphAdd(primary, "K");
379     MetaphAdd(secondary, "K");
380     }
381     else
382     {
383     if (current > 0)
384     {
385     if (StringAt(original, 0, 2, "MC", ""))
386     {
387     /* e.g., "McHugh" */
388     MetaphAdd(primary, "K");
389     MetaphAdd(secondary, "K");
390     }
391     else
392     {
393     MetaphAdd(primary, "X");
394     MetaphAdd(secondary, "K");
395     }
396     }
397     else
398     {
399     MetaphAdd(primary, "X");
400     MetaphAdd(secondary, "X");
401     }
402     }
403     current += 2;
404     break;
405     }
406     /* e.g, 'czerny' */
407     if (StringAt(original, current, 2, "CZ", "")
408     && !StringAt(original, (current - 2), 4, "WICZ", ""))
409     {
410     MetaphAdd(primary, "S");
411     MetaphAdd(secondary, "X");
412     current += 2;
413     break;
414     }
415    
416     /* e.g., 'focaccia' */
417     if (StringAt(original, (current + 1), 3, "CIA", ""))
418     {
419     MetaphAdd(primary, "X");
420     MetaphAdd(secondary, "X");
421     current += 3;
422     break;
423     }
424    
425     /* double 'C', but not if e.g. 'McClellan' */
426     if (StringAt(original, current, 2, "CC", "")
427     && !((current == 1) && (GetAt(original, 0) == 'M')))
428     {
429     /* 'bellocchio' but not 'bacchus' */
430     if (StringAt(original, (current + 2), 1, "I", "E", "H", "")
431     && !StringAt(original, (current + 2), 2, "HU", ""))
432     {
433     /* 'accident', 'accede' 'succeed' */
434     if (
435     ((current == 1)
436     && (GetAt(original, current - 1) == 'A'))
437     || StringAt(original, (current - 1), 5, "UCCEE",
438     "UCCES", ""))
439     {
440     MetaphAdd(primary, "KS");
441     MetaphAdd(secondary, "KS");
442     /* 'bacci', 'bertucci', other italian */
443     }
444     else
445     {
446     MetaphAdd(primary, "X");
447     MetaphAdd(secondary, "X");
448     }
449     current += 3;
450     break;
451     }
452     else
453     { /* Pierce's rule */
454     MetaphAdd(primary, "K");
455     MetaphAdd(secondary, "K");
456     current += 2;
457     break;
458     }
459     }
460    
461     if (StringAt(original, current, 2, "CK", "CG", "CQ", ""))
462     {
463     MetaphAdd(primary, "K");
464     MetaphAdd(secondary, "K");
465     current += 2;
466     break;
467     }
468    
469     if (StringAt(original, current, 2, "CI", "CE", "CY", ""))
470     {
471     /* italian vs. english */
472     if (StringAt
473     (original, current, 3, "CIO", "CIE", "CIA", ""))
474     {
475     MetaphAdd(primary, "S");
476     MetaphAdd(secondary, "X");
477     }
478     else
479     {
480     MetaphAdd(primary, "S");
481     MetaphAdd(secondary, "S");
482     }
483     current += 2;
484     break;
485     }
486    
487     /* else */
488     MetaphAdd(primary, "K");
489     MetaphAdd(secondary, "K");
490    
491     /* name sent in 'mac caffrey', 'mac gregor */
492     if (StringAt(original, (current + 1), 2, " C", " Q", " G", ""))
493     current += 3;
494     else
495     if (StringAt(original, (current + 1), 1, "C", "K", "Q", "")
496     && !StringAt(original, (current + 1), 2, "CE", "CI", ""))
497     current += 2;
498     else
499     current += 1;
500     break;
501    
502     case 'D':
503     if (StringAt(original, current, 2, "DG", ""))
504     {
505     if (StringAt(original, (current + 2), 1, "I", "E", "Y", ""))
506     {
507     /* e.g. 'edge' */
508     MetaphAdd(primary, "J");
509     MetaphAdd(secondary, "J");
510     current += 3;
511     break;
512     }
513     else
514     {
515     /* e.g. 'edgar' */
516     MetaphAdd(primary, "TK");
517     MetaphAdd(secondary, "TK");
518     current += 2;
519     break;
520     }
521     }
522    
523     if (StringAt(original, current, 2, "DT", "DD", ""))
524     {
525     MetaphAdd(primary, "T");
526     MetaphAdd(secondary, "T");
527     current += 2;
528     break;
529     }
530    
531     /* else */
532     MetaphAdd(primary, "T");
533     MetaphAdd(secondary, "T");
534     current += 1;
535     break;
536    
537     case 'F':
538     if (GetAt(original, current + 1) == 'F')
539     current += 2;
540     else
541     current += 1;
542     MetaphAdd(primary, "F");
543     MetaphAdd(secondary, "F");
544     break;
545    
546     case 'G':
547     if (GetAt(original, current + 1) == 'H')
548     {
549     if ((current > 0) && !IsVowel(original, current - 1))
550     {
551     MetaphAdd(primary, "K");
552     MetaphAdd(secondary, "K");
553     current += 2;
554     break;
555     }
556    
557     if (current < 3)
558     {
559     /* 'ghislane', ghiradelli */
560     if (current == 0)
561     {
562     if (GetAt(original, current + 2) == 'I')
563     {
564     MetaphAdd(primary, "J");
565     MetaphAdd(secondary, "J");
566     }
567     else
568     {
569     MetaphAdd(primary, "K");
570     MetaphAdd(secondary, "K");
571     }
572     current += 2;
573     break;
574     }
575     }
576     /* Parker's rule (with some further refinements) - e.g., 'hugh' */
577     if (
578     ((current > 1)
579     && StringAt(original, (current - 2), 1, "B", "H", "D", ""))
580     /* e.g., 'bough' */
581     || ((current > 2)
582     && StringAt(original, (current - 3), 1, "B", "H", "D", ""))
583     /* e.g., 'broughton' */
584     || ((current > 3)
585     && StringAt(original, (current - 4), 1, "B", "H", "")))
586     {
587     current += 2;
588     break;
589     }
590     else
591     {
592     /* e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' */
593     if ((current > 2)
594     && (GetAt(original, current - 1) == 'U')
595     && StringAt(original, (current - 3), 1, "C",
596     "G", "L", "R", "T", ""))
597     {
598     MetaphAdd(primary, "F");
599     MetaphAdd(secondary, "F");
600     }
601     else if ((current > 0)
602     && GetAt(original, current - 1) != 'I')
603     {
604    
605    
606     MetaphAdd(primary, "K");
607     MetaphAdd(secondary, "K");
608     }
609    
610     current += 2;
611     break;
612     }
613     }
614    
615     if (GetAt(original, current + 1) == 'N')
616     {
617     if ((current == 1) && IsVowel(original, 0)
618     && !SlavoGermanic(original))
619     {
620     MetaphAdd(primary, "KN");
621     MetaphAdd(secondary, "N");
622     }
623     else
624     /* not e.g. 'cagney' */
625     if (!StringAt(original, (current + 2), 2, "EY", "")
626     && (GetAt(original, current + 1) != 'Y')
627     && !SlavoGermanic(original))
628     {
629     MetaphAdd(primary, "N");
630     MetaphAdd(secondary, "KN");
631     }
632     else
633     {
634     MetaphAdd(primary, "KN");
635     MetaphAdd(secondary, "KN");
636     }
637     current += 2;
638     break;
639     }
640    
641     /* 'tagliaro' */
642     if (StringAt(original, (current + 1), 2, "LI", "")
643     && !SlavoGermanic(original))
644     {
645     MetaphAdd(primary, "KL");
646     MetaphAdd(secondary, "L");
647     current += 2;
648     break;
649     }
650    
651     /* -ges-,-gep-,-gel-, -gie- at beginning */
652     if ((current == 0)
653     && ((GetAt(original, current + 1) == 'Y')
654     || StringAt(original, (current + 1), 2, "ES", "EP",
655     "EB", "EL", "EY", "IB", "IL", "IN", "IE",
656     "EI", "ER", "")))
657     {
658     MetaphAdd(primary, "K");
659     MetaphAdd(secondary, "J");
660     current += 2;
661     break;
662     }
663    
664     /* -ger-, -gy- */
665     if (
666     (StringAt(original, (current + 1), 2, "ER", "")
667     || (GetAt(original, current + 1) == 'Y'))
668     && !StringAt(original, 0, 6, "DANGER", "RANGER", "MANGER", "")
669     && !StringAt(original, (current - 1), 1, "E", "I", "")
670     && !StringAt(original, (current - 1), 3, "RGY", "OGY",
671     ""))
672     {
673     MetaphAdd(primary, "K");
674     MetaphAdd(secondary, "J");
675     current += 2;
676     break;
677     }
678    
679     /* italian e.g, 'biaggi' */
680     if (StringAt(original, (current + 1), 1, "E", "I", "Y", "")
681     || StringAt(original, (current - 1), 4, "AGGI", "OGGI", ""))
682     {
683     /* obvious germanic */
684     if (
685     (StringAt(original, 0, 4, "VAN ", "VON ", "")
686     || StringAt(original, 0, 3, "SCH", ""))
687     || StringAt(original, (current + 1), 2, "ET", ""))
688     {
689     MetaphAdd(primary, "K");
690     MetaphAdd(secondary, "K");
691     }
692     else
693     {
694     /* always soft if french ending */
695     if (StringAt
696     (original, (current + 1), 4, "IER ", ""))
697     {
698     MetaphAdd(primary, "J");
699     MetaphAdd(secondary, "J");
700     }
701     else
702     {
703     MetaphAdd(primary, "J");
704     MetaphAdd(secondary, "K");
705     }
706     }
707     current += 2;
708     break;
709     }
710    
711     if (GetAt(original, current + 1) == 'G')
712     current += 2;
713     else
714     current += 1;
715     MetaphAdd(primary, "K");
716     MetaphAdd(secondary, "K");
717     break;
718    
719     case 'H':
720     /* only keep if first & before vowel or btw. 2 vowels */
721     if (((current == 0) || IsVowel(original, current - 1))
722     && IsVowel(original, current + 1))
723     {
724     MetaphAdd(primary, "H");
725     MetaphAdd(secondary, "H");
726     current += 2;
727     }
728     else /* also takes care of 'HH' */
729     current += 1;
730     break;
731    
732     case 'J':
733     /* obvious spanish, 'jose', 'san jacinto' */
734     if (StringAt(original, current, 4, "JOSE", "")
735     || StringAt(original, 0, 4, "SAN ", ""))
736     {
737     if (((current == 0)
738     && (GetAt(original, current + 4) == ' '))
739     || StringAt(original, 0, 4, "SAN ", ""))
740     {
741     MetaphAdd(primary, "H");
742     MetaphAdd(secondary, "H");
743     }
744     else
745     {
746     MetaphAdd(primary, "J");
747     MetaphAdd(secondary, "H");
748     }
749     current += 1;
750     break;
751     }
752    
753     if ((current == 0)
754     && !StringAt(original, current, 4, "JOSE", ""))
755     {
756     MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */
757     MetaphAdd(secondary, "A");
758     }
759     else
760     {
761     /* spanish pron. of e.g. 'bajador' */
762     if (IsVowel(original, current - 1)
763     && !SlavoGermanic(original)
764     && ((GetAt(original, current + 1) == 'A')
765     || (GetAt(original, current + 1) == 'O')))
766     {
767     MetaphAdd(primary, "J");
768     MetaphAdd(secondary, "H");
769     }
770     else
771     {
772     if (current == last)
773     {
774     MetaphAdd(primary, "J");
775     MetaphAdd(secondary, "");
776     }
777     else
778     {
779     if (!StringAt(original, (current + 1), 1, "L", "T",
780     "K", "S", "N", "M", "B", "Z", "")
781     && !StringAt(original, (current - 1), 1,
782     "S", "K", "L", ""))
783     {
784     MetaphAdd(primary, "J");
785     MetaphAdd(secondary, "J");
786     }
787     }
788     }
789     }
790    
791     if (GetAt(original, current + 1) == 'J') /* it could happen! */
792     current += 2;
793     else
794     current += 1;
795     break;
796    
797     case 'K':
798     if (GetAt(original, current + 1) == 'K')
799     current += 2;
800     else
801     current += 1;
802     MetaphAdd(primary, "K");
803     MetaphAdd(secondary, "K");
804     break;
805    
806     case 'L':
807     if (GetAt(original, current + 1) == 'L')
808     {
809     /* spanish e.g. 'cabrillo', 'gallegos' */
810     if (((current == (length - 3))
811     && StringAt(original, (current - 1), 4, "ILLO",
812     "ILLA", "ALLE", ""))
813     || ((StringAt(original, (last - 1), 2, "AS", "OS", "")
814     || StringAt(original, last, 1, "A", "O", ""))
815     && StringAt(original, (current - 1), 4, "ALLE", "")))
816     {
817     MetaphAdd(primary, "L");
818     MetaphAdd(secondary, "");
819     current += 2;
820     break;
821     }
822     current += 2;
823     }
824     else
825     current += 1;
826     MetaphAdd(primary, "L");
827     MetaphAdd(secondary, "L");
828     break;
829    
830     case 'M':
831     if ((StringAt(original, (current - 1), 3, "UMB", "")
832     && (((current + 1) == last)
833     || StringAt(original, (current + 2), 2, "ER", "")))
834     /* 'dumb','thumb' */
835     || (GetAt(original, current + 1) == 'M'))
836     current += 2;
837     else
838     current += 1;
839     MetaphAdd(primary, "M");
840     MetaphAdd(secondary, "M");
841     break;
842    
843     case 'N':
844     if (GetAt(original, current + 1) == 'N')
845     current += 2;
846     else
847     current += 1;
848     MetaphAdd(primary, "N");
849     MetaphAdd(secondary, "N");
850     break;
851    
852     case 'Ñ':
853     current += 1;
854     MetaphAdd(primary, "N");
855     MetaphAdd(secondary, "N");
856     break;
857    
858     case 'P':
859     if (GetAt(original, current + 1) == 'H')
860     {
861     MetaphAdd(primary, "F");
862     MetaphAdd(secondary, "F");
863     current += 2;
864     break;
865     }
866    
867     /* also account for "campbell", "raspberry" */
868     if (StringAt(original, (current + 1), 1, "P", "B", ""))
869     current += 2;
870     else
871     current += 1;
872     MetaphAdd(primary, "P");
873     MetaphAdd(secondary, "P");
874     break;
875    
876     case 'Q':
877     if (GetAt(original, current + 1) == 'Q')
878     current += 2;
879     else
880     current += 1;
881     MetaphAdd(primary, "K");
882     MetaphAdd(secondary, "K");
883     break;
884    
885     case 'R':
886     /* french e.g. 'rogier', but exclude 'hochmeier' */
887     if ((current == last)
888     && !SlavoGermanic(original)
889     && StringAt(original, (current - 2), 2, "IE", "")
890     && !StringAt(original, (current - 4), 2, "ME", "MA", ""))
891     {
892     MetaphAdd(primary, "");
893     MetaphAdd(secondary, "R");
894     }
895     else
896     {
897     MetaphAdd(primary, "R");
898     MetaphAdd(secondary, "R");
899     }
900    
901     if (GetAt(original, current + 1) == 'R')
902     current += 2;
903     else
904     current += 1;
905     break;
906    
907     case 'S':
908     /* special cases 'island', 'isle', 'carlisle', 'carlysle' */
909     if (StringAt(original, (current - 1), 3, "ISL", "YSL", ""))
910     {
911     current += 1;
912     break;
913     }
914    
915     /* special case 'sugar-' */
916     if ((current == 0)
917     && StringAt(original, current, 5, "SUGAR", ""))
918     {
919     MetaphAdd(primary, "X");
920     MetaphAdd(secondary, "S");
921     current += 1;
922     break;
923     }
924    
925     if (StringAt(original, current, 2, "SH", ""))
926     {
927     /* germanic */
928     if (StringAt
929     (original, (current + 1), 4, "HEIM", "HOEK", "HOLM",
930     "HOLZ", ""))
931     {
932     MetaphAdd(primary, "S");
933     MetaphAdd(secondary, "S");
934     }
935     else
936     {
937     MetaphAdd(primary, "X");
938     MetaphAdd(secondary, "X");
939     }
940     current += 2;
941     break;
942     }
943    
944     /* italian & armenian */
945     if (StringAt(original, current, 3, "SIO", "SIA", "")
946     || StringAt(original, current, 4, "SIAN", ""))
947     {
948     if (!SlavoGermanic(original))
949     {
950     MetaphAdd(primary, "S");
951     MetaphAdd(secondary, "X");
952     }
953     else
954     {
955     MetaphAdd(primary, "S");
956     MetaphAdd(secondary, "S");
957     }
958     current += 3;
959     break;
960     }
961    
962     /* german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
963     also, -sz- in slavic language altho in hungarian it is pronounced 's' */
964     if (((current == 0)
965     && StringAt(original, (current + 1), 1, "M", "N", "L", "W", ""))
966     || StringAt(original, (current + 1), 1, "Z", ""))
967     {
968     MetaphAdd(primary, "S");
969     MetaphAdd(secondary, "X");
970     if (StringAt(original, (current + 1), 1, "Z", ""))
971     current += 2;
972     else
973     current += 1;
974     break;
975     }
976    
977     if (StringAt(original, current, 2, "SC", ""))
978     {
979     /* Schlesinger's rule */
980     if (GetAt(original, current + 2) == 'H')
981     {
982     /* dutch origin, e.g. 'school', 'schooner' */
983     if (StringAt(original, (current + 3), 2, "OO", "ER", "EN",
984     "UY", "ED", "EM", ""))
985     {
986     /* 'schermerhorn', 'schenker' */
987     if (StringAt(original, (current + 3), 2, "ER", "EN", ""))
988     {
989     MetaphAdd(primary, "X");
990     MetaphAdd(secondary, "SK");
991     }
992     else
993     {
994     MetaphAdd(primary, "SK");
995     MetaphAdd(secondary, "SK");
996     }
997     current += 3;
998     break;
999     }
1000     else
1001     {
1002     if ((current == 0) && !IsVowel(original, 3)
1003     && (GetAt(original, 3) != 'W'))
1004     {
1005     MetaphAdd(primary, "X");
1006     MetaphAdd(secondary, "S");
1007     }
1008     else
1009     {
1010     MetaphAdd(primary, "X");
1011     MetaphAdd(secondary, "X");
1012     }
1013     current += 3;
1014     break;
1015     }
1016    
1017     if (StringAt(original, (current + 2), 1, "I", "E", "Y", ""))
1018     {
1019     MetaphAdd(primary, "S");
1020     MetaphAdd(secondary, "S");
1021     current += 3;
1022     break;
1023     }
1024     /* else */
1025     MetaphAdd(primary, "SK");
1026     MetaphAdd(secondary, "SK");
1027     current += 3;
1028     break;
1029     }
1030     }
1031    
1032     /* french e.g. 'resnais', 'artois' */
1033     if ((current == last)
1034     && StringAt(original, (current - 2), 2, "AI", "OI", ""))
1035     {
1036     MetaphAdd(primary, "");
1037     MetaphAdd(secondary, "S");
1038     }
1039     else
1040     {
1041     MetaphAdd(primary, "S");
1042     MetaphAdd(secondary, "S");
1043     }
1044    
1045     if (StringAt(original, (current + 1), 1, "S", "Z", ""))
1046     current += 2;
1047     else
1048     current += 1;
1049     break;
1050    
1051     case 'T':
1052     if (StringAt(original, current, 4, "TION", ""))
1053     {
1054     MetaphAdd(primary, "X");
1055     MetaphAdd(secondary, "X");
1056     current += 3;
1057     break;
1058     }
1059    
1060     if (StringAt(original, current, 3, "TIA", "TCH", ""))
1061     {
1062     MetaphAdd(primary, "X");
1063     MetaphAdd(secondary, "X");
1064     current += 3;
1065     break;
1066     }
1067    
1068     if (StringAt(original, current, 2, "TH", "")
1069     || StringAt(original, current, 3, "TTH", ""))
1070     {
1071     /* special case 'thomas', 'thames' or germanic */
1072     if (StringAt(original, (current + 2), 2, "OM", "AM", "")
1073     || StringAt(original, 0, 4, "VAN ", "VON ", "")
1074     || StringAt(original, 0, 3, "SCH", ""))
1075     {
1076     MetaphAdd(primary, "T");
1077     MetaphAdd(secondary, "T");
1078     }
1079     else
1080     {
1081     MetaphAdd(primary, "0");
1082     MetaphAdd(secondary, "T");
1083     }
1084     current += 2;
1085     break;
1086     }
1087    
1088     if (StringAt(original, (current + 1), 1, "T", "D", ""))
1089     current += 2;
1090     else
1091     current += 1;
1092     MetaphAdd(primary, "T");
1093     MetaphAdd(secondary, "T");
1094     break;
1095    
1096     case 'V':
1097     if (GetAt(original, current + 1) == 'V')
1098     current += 2;
1099     else
1100     current += 1;
1101     MetaphAdd(primary, "F");
1102     MetaphAdd(secondary, "F");
1103     break;
1104    
1105     case 'W':
1106     /* can also be in middle of word */
1107     if (StringAt(original, current, 2, "WR", ""))
1108     {
1109     MetaphAdd(primary, "R");
1110     MetaphAdd(secondary, "R");
1111     current += 2;
1112     break;
1113     }
1114    
1115     if ((current == 0)
1116     && (IsVowel(original, current + 1)
1117     || StringAt(original, current, 2, "WH", "")))
1118     {
1119     /* Wasserman should match Vasserman */
1120     if (IsVowel(original, current + 1))
1121     {
1122     MetaphAdd(primary, "A");
1123     MetaphAdd(secondary, "F");
1124     }
1125     else
1126     {
1127     /* need Uomo to match Womo */
1128     MetaphAdd(primary, "A");
1129     MetaphAdd(secondary, "A");
1130     }
1131     }
1132    
1133     /* Arnow should match Arnoff */
1134     if (((current == last) && IsVowel(original, current - 1))
1135     || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY",
1136     "OWSKI", "OWSKY", "")
1137     || StringAt(original, 0, 3, "SCH", ""))
1138     {
1139     MetaphAdd(primary, "");
1140     MetaphAdd(secondary, "F");
1141     current += 1;
1142     break;
1143     }
1144    
1145     /* polish e.g. 'filipowicz' */
1146     if (StringAt(original, current, 4, "WICZ", "WITZ", ""))
1147     {
1148     MetaphAdd(primary, "TS");
1149     MetaphAdd(secondary, "FX");
1150     current += 4;
1151     break;
1152     }
1153    
1154     /* else skip it */
1155     current += 1;
1156     break;
1157    
1158     case 'X':
1159     /* french e.g. breaux */
1160     if (!((current == last)
1161     && (StringAt(original, (current - 3), 3, "IAU", "EAU", "")
1162     || StringAt(original, (current - 2), 2, "AU", "OU", ""))))
1163     {
1164     MetaphAdd(primary, "KS");
1165     MetaphAdd(secondary, "KS");
1166     }
1167    
1168    
1169     if (StringAt(original, (current + 1), 1, "C", "X", ""))
1170     current += 2;
1171     else
1172     current += 1;
1173     break;
1174    
1175     case 'Z':
1176     /* chinese pinyin e.g. 'zhao' */
1177     if (GetAt(original, current + 1) == 'H')
1178     {
1179     MetaphAdd(primary, "J");
1180     MetaphAdd(secondary, "J");
1181     current += 2;
1182     break;
1183     }
1184     else if (StringAt(original, (current + 1), 2, "ZO", "ZI", "ZA", "")
1185     || (SlavoGermanic(original)
1186     && ((current > 0)
1187     && GetAt(original, current - 1) != 'T')))
1188     {
1189     MetaphAdd(primary, "S");
1190     MetaphAdd(secondary, "TS");
1191     }
1192     else
1193     {
1194     MetaphAdd(primary, "S");
1195     MetaphAdd(secondary, "S");
1196     }
1197    
1198     if (GetAt(original, current + 1) == 'Z')
1199     current += 2;
1200     else
1201     current += 1;
1202     break;
1203    
1204     default:
1205     current += 1;
1206     }
1207     /* printf("PRIMARY: %s\n", primary->str);
1208     printf("SECONDARY: %s\n", secondary->str); */
1209     }
1210    
1211    
1212     if (primary->length > 4)
1213     SetAt(primary, 4, '\0');
1214    
1215     if (secondary->length > 4)
1216     SetAt(secondary, 4, '\0');
1217    
1218     *codes = primary->str;
1219     *++codes = secondary->str;
1220    
1221     DestroyMetaString(original);
1222     DestroyMetaString(primary);
1223     DestroyMetaString(secondary);
1224     }
1225    

  ViewVC Help
Powered by ViewVC 1.1.22