1 |
/* |
2 |
|
3 |
Simple lexical analyser for finding and extracting "definition statements". This |
4 |
analyser finds "definition statements" within Fortran |
5 |
comment blocks. An analyser to find "definition statements" |
6 |
in other languages would look very similar. |
7 |
A "definition statement" consists of a name_list |
8 |
block and a descriptioin block, separated by a delimiter. |
9 |
A single definition statement can have formats of the style |
10 |
|
11 |
name_list :: description |
12 |
|
13 |
-or- |
14 |
|
15 |
name_list, :: description |
16 |
name_list :: description |
17 |
:: description |
18 |
|
19 |
-or- |
20 |
|
21 |
name_list, :: description |
22 |
name_list, :: description |
23 |
name_list :: |
24 |
|
25 |
-or- |
26 |
|
27 |
name_list :: description |
28 |
:: description |
29 |
|
30 |
The name_list entries to the left of the ' ::' delimiter are the definition statements |
31 |
name_list block. The description entries to the right of the ' ::' delimiter are the |
32 |
definition statements description block. |
33 |
|
34 |
description is text up to a new line. |
35 |
name_list is a list of one or more names, separated by white space or other |
36 |
punctuation (-,+,*,etc....). A name is a variable name, CPP symbol, procedure name |
37 |
etc... |
38 |
The :: delimiter must have at least one blank preceeding it. This is required so |
39 |
that the C++ :: syntax for delimiting objects in a class is not interpreted as a definition |
40 |
statement. |
41 |
|
42 |
These statements are used in structured commentary to provide |
43 |
definition documentation of variables, procedures etc... that can be extracted |
44 |
automatically for use in code synopsis documents and for use in code analysis. |
45 |
|
46 |
This lexer works with Fortran [CcDd] in column one comments. |
47 |
A very similar lexer could be used for C and C++ style one-line |
48 |
and multi-live comments, as well as for Fortran ! comments. |
49 |
|
50 |
Grammar that has to be recognised and processed is |
51 |
|
52 |
1. COMMENT_STARTED name_list DESCRIP_DELIM descrip_text EOL |
53 |
2. COMMENT_STARTED name_list COMMA DESCRIP_DELIM descrip_text EOL |
54 |
3. COMMENT_STARTED DESCRIP_DELIM descrip_text EOL |
55 |
4. COMMENT_STARTED name_list EOL |
56 |
5. COMMENT_STARTED name_list COMMA EOL |
57 |
6. COMMENT_STARTED EOL |
58 |
|
59 |
1. This is recognised as a terminal record defining a list of variables |
60 |
associated with accumulated descrip_text. The description can continue |
61 |
over subsequent lines, but if there is a name_list on a subsequent line |
62 |
it will be assumed to be the start of another definition statement. |
63 |
|
64 |
2. This is recognised as an intermediate record defining a list of variables |
65 |
associated with accumulated descrip_text. The name_list COMMA syntax is |
66 |
the only way to indicate that a name_list spanning several lines in a file |
67 |
shares a single description block. |
68 |
|
69 |
3. This is recognised as a continued line of description for the |
70 |
immediately preceeding variable list. |
71 |
|
72 |
4.,5.,6. These are recognised as valid grammr and are ignored. |
73 |
|
74 |
NAME_LIST and DESCRIP_TEXT pass strings to the parser. Other states |
75 |
just set line number, character number and token id. |
76 |
|
77 |
*/ |
78 |
|
79 |
/* Set limits for internal tables so they don't overflow */ |
80 |
%p 100000 |
81 |
%o 100000 |
82 |
%n 100000 |
83 |
%k 100000 |
84 |
%e 100000 |
85 |
%x COMMENT_STARTED |
86 |
%x IN_DESCRIP |
87 |
%x NOT_A_COMMENT |
88 |
%option noyywrap |
89 |
|
90 |
/* Things we will match */ |
91 |
NAME [_a-zA-Z]+[_a-zA-Z0-9.]* |
92 |
DSPLIT [ ]*:: |
93 |
COMMA , |
94 |
|
95 |
%{ |
96 |
#define DP(a) |
97 |
#include "string.h" |
98 |
#include "stdio.h" |
99 |
#include "Getcomm.tab.h" |
100 |
/* Line and character no. counter */ |
101 |
int Lno=1;int Cno=1; |
102 |
int call1Getcommlex = 0; |
103 |
#include "GLOBALS.h" |
104 |
%} |
105 |
|
106 |
|
107 |
%% |
108 |
|
109 |
if ( call1Getcommlex == 0 ) { |
110 |
BEGIN(NOT_A_COMMENT); |
111 |
call1Getcommlex = 1; |
112 |
namecount=0; |
113 |
descriptcount=0; |
114 |
name_open=0; |
115 |
descript_open=0; |
116 |
} |
117 |
|
118 |
|
119 |
<NOT_A_COMMENT>^[CcDd] { /* A comment starts */ |
120 |
Cno=Cno+Getcommleng; /* In Fortran this is a C in column 1 */ |
121 |
BEGIN(COMMENT_STARTED); /* Could also do C-style and ! style */ |
122 |
DP(fprintf(stdout,"<COMMENT_START>\n");) |
123 |
return(COMMENT_START); |
124 |
} |
125 |
|
126 |
<COMMENT_STARTED>{NAME} { /* In the start of a comment any word */ |
127 |
Cno=Cno+Getcommleng; /* could be a name part of a definition */ |
128 |
DP(fprintf(stdout,"<NAME>\n");) /* statement.. */ |
129 |
DP(fprintf(stdout,"%s",Getcommtext);) /* ACTION: */ |
130 |
DP(fprintf(stdout,"\n<\\NAME>\n");) /* Return NAME token and value */ |
131 |
/* strdup name onto stack; increment name stack counter */ |
132 |
namearr[namecount]=strdup(Getcommtext); |
133 |
++namecount; if ( namecount == MAX_NAMEARR ) { |
134 |
fflush(stdout); |
135 |
fprintf(stderr,"namecount == %d, need to increase MAX_NAMEARR in Getcomm.lex\n", namecount); |
136 |
exit(-1); |
137 |
} |
138 |
return(NAME); |
139 |
} |
140 |
|
141 |
<COMMENT_STARTED>{DSPLIT} { /* Description separator found. Set context */ |
142 |
Cno=Cno+Getcommleng; /* to in description. */ |
143 |
BEGIN(IN_DESCRIP); /* ACTION: */ |
144 |
DP(fprintf(stdout,"<DESCRIP_DELIM>\n");) /* Return DESCRIP_DELIM token */ |
145 |
return(DESCRIP_DELIM); |
146 |
} |
147 |
|
148 |
<COMMENT_STARTED>{COMMA} { /* Match commas specially as these are used */ |
149 |
Cno=Cno+Getcommleng; /* to find extended lists of variable names */ |
150 |
DP(fprintf(stdout,"<COMMA>\n");) /* ACTION: */ |
151 |
return(COMMA); /* Return COMMA token */ |
152 |
} |
153 |
|
154 |
<COMMENT_STARTED>\n { |
155 |
Lno=Lno+1; |
156 |
Cno=1; |
157 |
BEGIN(NOT_A_COMMENT); /* ACTION: */ |
158 |
DP(fprintf(stdout,"\n<\\COMMENT_START>\n");) /* Return EOL token */ |
159 |
return(EOL); |
160 |
} |
161 |
|
162 |
<COMMENT_STARTED>[ ] { |
163 |
/* ACTION: */ |
164 |
Cno=1; /* Return SPACE token */ |
165 |
return(SPACE); |
166 |
} |
167 |
|
168 |
<COMMENT_STARTED>. { |
169 |
/* ACTION: */ |
170 |
Cno=1; /* Return PUNCT token */ |
171 |
return(PUNCT); |
172 |
} |
173 |
|
174 |
<IN_DESCRIP>. { |
175 |
Cno=Cno+Getcommleng; |
176 |
DP(fprintf(stdout,"<DESCRIP_TEXT>\n");) /* ACTION: */ |
177 |
DP(fprintf(stdout,"%s",Getcommtext);) /* Return DESCRIPT token and value */ |
178 |
DP(fprintf(stdout,"\n<\\DESCRIP_TEXT>\n");) |
179 |
/* strdup text onto stack; increment text stack counter */ |
180 |
descriparr[descriptcount]=strdup(Getcommtext); |
181 |
++descriptcount; if ( descriptcount == MAX_DESCRIPARR ) { |
182 |
fflush(stdout); |
183 |
fprintf(stderr,"descriptcount == %d, need to increase MAX_DESCRIPARR in Getcomm.lex\n", descriptcount); |
184 |
exit(-1); |
185 |
} |
186 |
return(PUNCT); |
187 |
} |
188 |
|
189 |
<IN_DESCRIP>\n { |
190 |
Lno=Lno+1; |
191 |
Cno=Cno+Getcommleng; |
192 |
BEGIN(NOT_A_COMMENT); |
193 |
DP(fprintf(stdout,"<EOL>\n");) /* ACTION: */ |
194 |
DP(fprintf(stdout,"<\\COMMENT_START>\n");) /* Return EOL token */ |
195 |
return(EOL); |
196 |
} |
197 |
|
198 |
<NOT_A_COMMENT>\n { |
199 |
Lno=Lno+1; Cno=1; /* ACTION: */ |
200 |
/* Skip */ |
201 |
} |
202 |
|
203 |
<NOT_A_COMMENT>. { |
204 |
Cno=Cno+Getcommleng; /* ACTION: */ |
205 |
} /* Skip */ |
206 |
|
207 |
. { |
208 |
Cno=Cno+Getcommleng; /* ACTION: */ |
209 |
} /* Skip */ |
210 |
|
211 |
\n { |
212 |
Lno=Lno+1; Cno=1; /* ACTION: */ |
213 |
} /* Skip */ |
214 |
%% |
215 |
|
216 |
int Getcommdebug; |
217 |
main() |
218 |
{ |
219 |
|
220 |
Getcommparse(); |
221 |
|
222 |
} |