| 1 |
/* |
| 2 |
|
| 3 |
Simple lexical analyser for finding and extracting "definition statements". This |
| 4 |
analyser finds "definition statements" within Fortran |
| 5 |
comment blocks. An analyser to find "definition statements" |
| 6 |
in other languages would look very similar. |
| 7 |
A "definition statement" consists of a name_list |
| 8 |
block and a descriptioin block, separated by a delimiter. |
| 9 |
A single definition statement can have formats of the style |
| 10 |
|
| 11 |
name_list :: description |
| 12 |
|
| 13 |
-or- |
| 14 |
|
| 15 |
name_list, :: description |
| 16 |
name_list :: description |
| 17 |
:: description |
| 18 |
|
| 19 |
-or- |
| 20 |
|
| 21 |
name_list, :: description |
| 22 |
name_list, :: description |
| 23 |
name_list :: |
| 24 |
|
| 25 |
-or- |
| 26 |
|
| 27 |
name_list :: description |
| 28 |
:: description |
| 29 |
|
| 30 |
The name_list entries to the left of the ' ::' delimiter are the definition statements |
| 31 |
name_list block. The description entries to the right of the ' ::' delimiter are the |
| 32 |
definition statements description block. |
| 33 |
|
| 34 |
description is text up to a new line. |
| 35 |
name_list is a list of one or more names, separated by white space or other |
| 36 |
punctuation (-,+,*,etc....). A name is a variable name, CPP symbol, procedure name |
| 37 |
etc... |
| 38 |
The :: delimiter must have at least one blank preceeding it. This is required so |
| 39 |
that the C++ :: syntax for delimiting objects in a class is not interpreted as a definition |
| 40 |
statement. |
| 41 |
|
| 42 |
These statements are used in structured commentary to provide |
| 43 |
definition documentation of variables, procedures etc... that can be extracted |
| 44 |
automatically for use in code synopsis documents and for use in code analysis. |
| 45 |
|
| 46 |
This lexer works with Fortran [CcDd] in column one comments. |
| 47 |
A very similar lexer could be used for C and C++ style one-line |
| 48 |
and multi-live comments, as well as for Fortran ! comments. |
| 49 |
|
| 50 |
Grammar that has to be recognised and processed is |
| 51 |
|
| 52 |
1. COMMENT_STARTED name_list DESCRIP_DELIM descrip_text EOL |
| 53 |
2. COMMENT_STARTED name_list COMMA DESCRIP_DELIM descrip_text EOL |
| 54 |
3. COMMENT_STARTED DESCRIP_DELIM descrip_text EOL |
| 55 |
4. COMMENT_STARTED name_list EOL |
| 56 |
5. COMMENT_STARTED name_list COMMA EOL |
| 57 |
6. COMMENT_STARTED EOL |
| 58 |
|
| 59 |
1. This is recognised as a terminal record defining a list of variables |
| 60 |
associated with accumulated descrip_text. The description can continue |
| 61 |
over subsequent lines, but if there is a name_list on a subsequent line |
| 62 |
it will be assumed to be the start of another definition statement. |
| 63 |
|
| 64 |
2. This is recognised as an intermediate record defining a list of variables |
| 65 |
associated with accumulated descrip_text. The name_list COMMA syntax is |
| 66 |
the only way to indicate that a name_list spanning several lines in a file |
| 67 |
shares a single description block. |
| 68 |
|
| 69 |
3. This is recognised as a continued line of description for the |
| 70 |
immediately preceeding variable list. |
| 71 |
|
| 72 |
4.,5.,6. These are recognised as valid grammr and are ignored. |
| 73 |
|
| 74 |
NAME_LIST and DESCRIP_TEXT pass strings to the parser. Other states |
| 75 |
just set line number, character number and token id. |
| 76 |
|
| 77 |
*/ |
| 78 |
|
| 79 |
/* Set limits for internal tables so they don't overflow */ |
| 80 |
%p 100000 |
| 81 |
%o 100000 |
| 82 |
%n 100000 |
| 83 |
%k 100000 |
| 84 |
%e 100000 |
| 85 |
%x COMMENT_STARTED |
| 86 |
%x IN_DESCRIP |
| 87 |
%x NOT_A_COMMENT |
| 88 |
%option noyywrap |
| 89 |
|
| 90 |
/* Things we will match */ |
| 91 |
NAME [_a-zA-Z]+[_a-zA-Z0-9.]* |
| 92 |
DSPLIT [ ]*:: |
| 93 |
COMMA , |
| 94 |
|
| 95 |
%{ |
| 96 |
#define DP(a) |
| 97 |
#include "string.h" |
| 98 |
#include "stdio.h" |
| 99 |
#include "Getcomm.tab.h" |
| 100 |
/* Line and character no. counter */ |
| 101 |
int Lno=1;int Cno=1; |
| 102 |
int call1Getcommlex = 0; |
| 103 |
#include "GLOBALS.h" |
| 104 |
%} |
| 105 |
|
| 106 |
|
| 107 |
%% |
| 108 |
|
| 109 |
if ( call1Getcommlex == 0 ) { |
| 110 |
BEGIN(NOT_A_COMMENT); |
| 111 |
call1Getcommlex = 1; |
| 112 |
namecount=0; |
| 113 |
descriptcount=0; |
| 114 |
name_open=0; |
| 115 |
descript_open=0; |
| 116 |
} |
| 117 |
|
| 118 |
|
| 119 |
<NOT_A_COMMENT>^[CcDd] { /* A comment starts */ |
| 120 |
Cno=Cno+Getcommleng; /* In Fortran this is a C in column 1 */ |
| 121 |
BEGIN(COMMENT_STARTED); /* Could also do C-style and ! style */ |
| 122 |
DP(fprintf(stdout,"<COMMENT_START>\n");) |
| 123 |
return(COMMENT_START); |
| 124 |
} |
| 125 |
|
| 126 |
<COMMENT_STARTED>{NAME} { /* In the start of a comment any word */ |
| 127 |
Cno=Cno+Getcommleng; /* could be a name part of a definition */ |
| 128 |
DP(fprintf(stdout,"<NAME>\n");) /* statement.. */ |
| 129 |
DP(fprintf(stdout,"%s",Getcommtext);) /* ACTION: */ |
| 130 |
DP(fprintf(stdout,"\n<\\NAME>\n");) /* Return NAME token and value */ |
| 131 |
/* strdup name onto stack; increment name stack counter */ |
| 132 |
namearr[namecount]=strdup(Getcommtext); |
| 133 |
++namecount; if ( namecount == MAX_NAMEARR ) { |
| 134 |
fflush(stdout); |
| 135 |
fprintf(stderr,"namecount == %d, need to increase MAX_NAMEARR in Getcomm.lex\n", namecount); |
| 136 |
exit(-1); |
| 137 |
} |
| 138 |
return(NAME); |
| 139 |
} |
| 140 |
|
| 141 |
<COMMENT_STARTED>{DSPLIT} { /* Description separator found. Set context */ |
| 142 |
Cno=Cno+Getcommleng; /* to in description. */ |
| 143 |
BEGIN(IN_DESCRIP); /* ACTION: */ |
| 144 |
DP(fprintf(stdout,"<DESCRIP_DELIM>\n");) /* Return DESCRIP_DELIM token */ |
| 145 |
return(DESCRIP_DELIM); |
| 146 |
} |
| 147 |
|
| 148 |
<COMMENT_STARTED>{COMMA} { /* Match commas specially as these are used */ |
| 149 |
Cno=Cno+Getcommleng; /* to find extended lists of variable names */ |
| 150 |
DP(fprintf(stdout,"<COMMA>\n");) /* ACTION: */ |
| 151 |
return(COMMA); /* Return COMMA token */ |
| 152 |
} |
| 153 |
|
| 154 |
<COMMENT_STARTED>\n { |
| 155 |
Lno=Lno+1; |
| 156 |
Cno=1; |
| 157 |
BEGIN(NOT_A_COMMENT); /* ACTION: */ |
| 158 |
DP(fprintf(stdout,"\n<\\COMMENT_START>\n");) /* Return EOL token */ |
| 159 |
return(EOL); |
| 160 |
} |
| 161 |
|
| 162 |
<COMMENT_STARTED>[ ] { |
| 163 |
/* ACTION: */ |
| 164 |
Cno=1; /* Return SPACE token */ |
| 165 |
return(SPACE); |
| 166 |
} |
| 167 |
|
| 168 |
<COMMENT_STARTED>. { |
| 169 |
/* ACTION: */ |
| 170 |
Cno=1; /* Return PUNCT token */ |
| 171 |
return(PUNCT); |
| 172 |
} |
| 173 |
|
| 174 |
<IN_DESCRIP>. { |
| 175 |
Cno=Cno+Getcommleng; |
| 176 |
DP(fprintf(stdout,"<DESCRIP_TEXT>\n");) /* ACTION: */ |
| 177 |
DP(fprintf(stdout,"%s",Getcommtext);) /* Return DESCRIPT token and value */ |
| 178 |
DP(fprintf(stdout,"\n<\\DESCRIP_TEXT>\n");) |
| 179 |
/* strdup text onto stack; increment text stack counter */ |
| 180 |
descriparr[descriptcount]=strdup(Getcommtext); |
| 181 |
++descriptcount; if ( descriptcount == MAX_DESCRIPARR ) { |
| 182 |
fflush(stdout); |
| 183 |
fprintf(stderr,"descriptcount == %d, need to increase MAX_DESCRIPARR in Getcomm.lex\n", descriptcount); |
| 184 |
exit(-1); |
| 185 |
} |
| 186 |
return(PUNCT); |
| 187 |
} |
| 188 |
|
| 189 |
<IN_DESCRIP>\n { |
| 190 |
Lno=Lno+1; |
| 191 |
Cno=Cno+Getcommleng; |
| 192 |
BEGIN(NOT_A_COMMENT); |
| 193 |
DP(fprintf(stdout,"<EOL>\n");) /* ACTION: */ |
| 194 |
DP(fprintf(stdout,"<\\COMMENT_START>\n");) /* Return EOL token */ |
| 195 |
return(EOL); |
| 196 |
} |
| 197 |
|
| 198 |
<NOT_A_COMMENT>\n { |
| 199 |
Lno=Lno+1; Cno=1; /* ACTION: */ |
| 200 |
/* Skip */ |
| 201 |
} |
| 202 |
|
| 203 |
<NOT_A_COMMENT>. { |
| 204 |
Cno=Cno+Getcommleng; /* ACTION: */ |
| 205 |
} /* Skip */ |
| 206 |
|
| 207 |
. { |
| 208 |
Cno=Cno+Getcommleng; /* ACTION: */ |
| 209 |
} /* Skip */ |
| 210 |
|
| 211 |
\n { |
| 212 |
Lno=Lno+1; Cno=1; /* ACTION: */ |
| 213 |
} /* Skip */ |
| 214 |
%% |
| 215 |
|
| 216 |
int Getcommdebug; |
| 217 |
main() |
| 218 |
{ |
| 219 |
|
| 220 |
Getcommparse(); |
| 221 |
|
| 222 |
} |