1 |
adcroft |
1.1 |
/* |
2 |
|
|
|
3 |
|
|
Simple lexical analyser for finding and extracting "definition statements". This |
4 |
|
|
analyser finds "definition statements" within Fortran |
5 |
|
|
comment blocks. An analyser to find "definition statements" |
6 |
|
|
in other languages would look very similar. |
7 |
|
|
A "definition statement" consists of a name_list |
8 |
|
|
block and a descriptioin block, separated by a delimiter. |
9 |
|
|
A single definition statement can have formats of the style |
10 |
|
|
|
11 |
|
|
name_list :: description |
12 |
|
|
|
13 |
|
|
-or- |
14 |
|
|
|
15 |
|
|
name_list, :: description |
16 |
|
|
name_list :: description |
17 |
|
|
:: description |
18 |
|
|
|
19 |
|
|
-or- |
20 |
|
|
|
21 |
|
|
name_list, :: description |
22 |
|
|
name_list, :: description |
23 |
|
|
name_list :: |
24 |
|
|
|
25 |
|
|
-or- |
26 |
|
|
|
27 |
|
|
name_list :: description |
28 |
|
|
:: description |
29 |
|
|
|
30 |
|
|
The name_list entries to the left of the ' ::' delimiter are the definition statements |
31 |
|
|
name_list block. The description entries to the right of the ' ::' delimiter are the |
32 |
|
|
definition statements description block. |
33 |
|
|
|
34 |
|
|
description is text up to a new line. |
35 |
|
|
name_list is a list of one or more names, separated by white space or other |
36 |
|
|
punctuation (-,+,*,etc....). A name is a variable name, CPP symbol, procedure name |
37 |
|
|
etc... |
38 |
|
|
The :: delimiter must have at least one blank preceeding it. This is required so |
39 |
|
|
that the C++ :: syntax for delimiting objects in a class is not interpreted as a definition |
40 |
|
|
statement. |
41 |
|
|
|
42 |
|
|
These statements are used in structured commentary to provide |
43 |
|
|
definition documentation of variables, procedures etc... that can be extracted |
44 |
|
|
automatically for use in code synopsis documents and for use in code analysis. |
45 |
|
|
|
46 |
|
|
This lexer works with Fortran [CcDd] in column one comments. |
47 |
|
|
A very similar lexer could be used for C and C++ style one-line |
48 |
|
|
and multi-live comments, as well as for Fortran ! comments. |
49 |
|
|
|
50 |
|
|
Grammar that has to be recognised and processed is |
51 |
|
|
|
52 |
|
|
1. COMMENT_STARTED name_list DESCRIP_DELIM descrip_text EOL |
53 |
|
|
2. COMMENT_STARTED name_list COMMA DESCRIP_DELIM descrip_text EOL |
54 |
|
|
3. COMMENT_STARTED DESCRIP_DELIM descrip_text EOL |
55 |
|
|
4. COMMENT_STARTED name_list EOL |
56 |
|
|
5. COMMENT_STARTED name_list COMMA EOL |
57 |
|
|
6. COMMENT_STARTED EOL |
58 |
|
|
|
59 |
|
|
1. This is recognised as a terminal record defining a list of variables |
60 |
|
|
associated with accumulated descrip_text. The description can continue |
61 |
|
|
over subsequent lines, but if there is a name_list on a subsequent line |
62 |
|
|
it will be assumed to be the start of another definition statement. |
63 |
|
|
|
64 |
|
|
2. This is recognised as an intermediate record defining a list of variables |
65 |
|
|
associated with accumulated descrip_text. The name_list COMMA syntax is |
66 |
|
|
the only way to indicate that a name_list spanning several lines in a file |
67 |
|
|
shares a single description block. |
68 |
|
|
|
69 |
|
|
3. This is recognised as a continued line of description for the |
70 |
|
|
immediately preceeding variable list. |
71 |
|
|
|
72 |
|
|
4.,5.,6. These are recognised as valid grammr and are ignored. |
73 |
|
|
|
74 |
|
|
NAME_LIST and DESCRIP_TEXT pass strings to the parser. Other states |
75 |
|
|
just set line number, character number and token id. |
76 |
|
|
|
77 |
|
|
*/ |
78 |
|
|
|
79 |
|
|
/* Set limits for internal tables so they don't overflow */ |
80 |
|
|
%p 100000 |
81 |
|
|
%o 100000 |
82 |
|
|
%n 100000 |
83 |
|
|
%k 100000 |
84 |
|
|
%e 100000 |
85 |
|
|
%x COMMENT_STARTED |
86 |
|
|
%x IN_DESCRIP |
87 |
|
|
%x NOT_A_COMMENT |
88 |
|
|
%option noyywrap |
89 |
|
|
|
90 |
|
|
/* Things we will match */ |
91 |
|
|
NAME [_a-zA-Z]+[_a-zA-Z0-9.]* |
92 |
|
|
DSPLIT [ ]*:: |
93 |
|
|
COMMA , |
94 |
|
|
|
95 |
|
|
%{ |
96 |
|
|
#define DP(a) |
97 |
|
|
#include "string.h" |
98 |
|
|
#include "stdio.h" |
99 |
|
|
#include "Getcomm.tab.h" |
100 |
|
|
/* Line and character no. counter */ |
101 |
|
|
int Lno=1;int Cno=1; |
102 |
|
|
int call1Getcommlex = 0; |
103 |
|
|
#include "GLOBALS.h" |
104 |
|
|
%} |
105 |
|
|
|
106 |
|
|
|
107 |
|
|
%% |
108 |
|
|
|
109 |
|
|
if ( call1Getcommlex == 0 ) { |
110 |
|
|
BEGIN(NOT_A_COMMENT); |
111 |
|
|
call1Getcommlex = 1; |
112 |
|
|
namecount=0; |
113 |
|
|
descriptcount=0; |
114 |
|
|
name_open=0; |
115 |
|
|
descript_open=0; |
116 |
|
|
} |
117 |
|
|
|
118 |
|
|
|
119 |
|
|
<NOT_A_COMMENT>^[CcDd] { /* A comment starts */ |
120 |
|
|
Cno=Cno+Getcommleng; /* In Fortran this is a C in column 1 */ |
121 |
|
|
BEGIN(COMMENT_STARTED); /* Could also do C-style and ! style */ |
122 |
|
|
DP(fprintf(stdout,"<COMMENT_START>\n");) |
123 |
|
|
return(COMMENT_START); |
124 |
|
|
} |
125 |
|
|
|
126 |
|
|
<COMMENT_STARTED>{NAME} { /* In the start of a comment any word */ |
127 |
|
|
Cno=Cno+Getcommleng; /* could be a name part of a definition */ |
128 |
|
|
DP(fprintf(stdout,"<NAME>\n");) /* statement.. */ |
129 |
|
|
DP(fprintf(stdout,"%s",Getcommtext);) /* ACTION: */ |
130 |
|
|
DP(fprintf(stdout,"\n<\\NAME>\n");) /* Return NAME token and value */ |
131 |
|
|
/* strdup name onto stack; increment name stack counter */ |
132 |
|
|
namearr[namecount]=strdup(Getcommtext); |
133 |
|
|
++namecount; if ( namecount == MAX_NAMEARR ) { |
134 |
|
|
fflush(stdout); |
135 |
|
|
fprintf(stderr,"namecount == %d, need to increase MAX_NAMEARR in Getcomm.lex\n", namecount); |
136 |
|
|
exit(-1); |
137 |
|
|
} |
138 |
|
|
return(NAME); |
139 |
|
|
} |
140 |
|
|
|
141 |
|
|
<COMMENT_STARTED>{DSPLIT} { /* Description separator found. Set context */ |
142 |
|
|
Cno=Cno+Getcommleng; /* to in description. */ |
143 |
|
|
BEGIN(IN_DESCRIP); /* ACTION: */ |
144 |
|
|
DP(fprintf(stdout,"<DESCRIP_DELIM>\n");) /* Return DESCRIP_DELIM token */ |
145 |
|
|
return(DESCRIP_DELIM); |
146 |
|
|
} |
147 |
|
|
|
148 |
|
|
<COMMENT_STARTED>{COMMA} { /* Match commas specially as these are used */ |
149 |
|
|
Cno=Cno+Getcommleng; /* to find extended lists of variable names */ |
150 |
|
|
DP(fprintf(stdout,"<COMMA>\n");) /* ACTION: */ |
151 |
|
|
return(COMMA); /* Return COMMA token */ |
152 |
|
|
} |
153 |
|
|
|
154 |
|
|
<COMMENT_STARTED>\n { |
155 |
|
|
Lno=Lno+1; |
156 |
|
|
Cno=1; |
157 |
|
|
BEGIN(NOT_A_COMMENT); /* ACTION: */ |
158 |
|
|
DP(fprintf(stdout,"\n<\\COMMENT_START>\n");) /* Return EOL token */ |
159 |
|
|
return(EOL); |
160 |
|
|
} |
161 |
|
|
|
162 |
|
|
<COMMENT_STARTED>[ ] { |
163 |
|
|
/* ACTION: */ |
164 |
|
|
Cno=1; /* Return SPACE token */ |
165 |
|
|
return(SPACE); |
166 |
|
|
} |
167 |
|
|
|
168 |
|
|
<COMMENT_STARTED>. { |
169 |
|
|
/* ACTION: */ |
170 |
|
|
Cno=1; /* Return PUNCT token */ |
171 |
|
|
return(PUNCT); |
172 |
|
|
} |
173 |
|
|
|
174 |
|
|
<IN_DESCRIP>. { |
175 |
|
|
Cno=Cno+Getcommleng; |
176 |
|
|
DP(fprintf(stdout,"<DESCRIP_TEXT>\n");) /* ACTION: */ |
177 |
|
|
DP(fprintf(stdout,"%s",Getcommtext);) /* Return DESCRIPT token and value */ |
178 |
|
|
DP(fprintf(stdout,"\n<\\DESCRIP_TEXT>\n");) |
179 |
|
|
/* strdup text onto stack; increment text stack counter */ |
180 |
|
|
descriparr[descriptcount]=strdup(Getcommtext); |
181 |
|
|
++descriptcount; if ( descriptcount == MAX_DESCRIPARR ) { |
182 |
|
|
fflush(stdout); |
183 |
|
|
fprintf(stderr,"descriptcount == %d, need to increase MAX_DESCRIPARR in Getcomm.lex\n", descriptcount); |
184 |
|
|
exit(-1); |
185 |
|
|
} |
186 |
|
|
return(PUNCT); |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
<IN_DESCRIP>\n { |
190 |
|
|
Lno=Lno+1; |
191 |
|
|
Cno=Cno+Getcommleng; |
192 |
|
|
BEGIN(NOT_A_COMMENT); |
193 |
|
|
DP(fprintf(stdout,"<EOL>\n");) /* ACTION: */ |
194 |
|
|
DP(fprintf(stdout,"<\\COMMENT_START>\n");) /* Return EOL token */ |
195 |
|
|
return(EOL); |
196 |
|
|
} |
197 |
|
|
|
198 |
|
|
<NOT_A_COMMENT>\n { |
199 |
|
|
Lno=Lno+1; Cno=1; /* ACTION: */ |
200 |
|
|
/* Skip */ |
201 |
|
|
} |
202 |
|
|
|
203 |
|
|
<NOT_A_COMMENT>. { |
204 |
|
|
Cno=Cno+Getcommleng; /* ACTION: */ |
205 |
|
|
} /* Skip */ |
206 |
|
|
|
207 |
|
|
. { |
208 |
|
|
Cno=Cno+Getcommleng; /* ACTION: */ |
209 |
|
|
} /* Skip */ |
210 |
|
|
|
211 |
|
|
\n { |
212 |
|
|
Lno=Lno+1; Cno=1; /* ACTION: */ |
213 |
|
|
} /* Skip */ |
214 |
|
|
%% |
215 |
|
|
|
216 |
|
|
int Getcommdebug; |
217 |
|
|
main() |
218 |
|
|
{ |
219 |
|
|
|
220 |
|
|
Getcommparse(); |
221 |
|
|
|
222 |
|
|
} |