1/* Lexical analysis for genksyms.
2   Copyright 1996, 1997 Linux International.
3
4   New implementation contributed by Richard Henderson <rth@tamu.edu>
5   Based on original work by Bjorn Ekwall <bj0rn@blox.se>
6
7   Taken from Linux modutils 2.4.22.
8
9   This program is free software; you can redistribute it and/or modify it
10   under the terms of the GNU General Public License as published by the
11   Free Software Foundation; either version 2 of the License, or (at your
12   option) any later version.
13
14   This program is distributed in the hope that it will be useful, but
15   WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17   General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software Foundation,
21   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
22
23
24%{
25
26#include <limits.h>
27#include <stdlib.h>
28#include <string.h>
29#include <ctype.h>
30
31#include "genksyms.h"
32#include "parse.tab.h"
33
34/* We've got a two-level lexer here.  We let flex do basic tokenization
35   and then we categorize those basic tokens in the second stage.  */
36#define YY_DECL		static int yylex1(void)
37
38%}
39
40IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
41
42O_INT			0[0-7]*
43D_INT			[1-9][0-9]*
44X_INT			0[Xx][0-9A-Fa-f]+
45I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
46INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
47
48FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
49EXP			[Ee][+-]?[0-9]+
50F_SUF			[FfLl]
51REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
52
53STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
54CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
55
56MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
57
58/* We don't do multiple input files.  */
59%option noyywrap
60
61%option noinput
62
63%%
64
65
66 /* Keep track of our location in the original source files.  */
67^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
68^#.*\n					cur_line++;
69\n					cur_line++;
70
71 /* Ignore all other whitespace.  */
72[ \t\f\v\r]+				;
73
74
75{STRING}				return STRING;
76{CHAR}					return CHAR;
77{IDENT}					return IDENT;
78
79 /* The Pedant requires that the other C multi-character tokens be
80    recognized as tokens.  We don't actually use them since we don't
81    parse expressions, but we do want whitespace to be arranged
82    around them properly.  */
83{MC_TOKEN}				return OTHER;
84{INT}					return INT;
85{REAL}					return REAL;
86
87"..."					return DOTS;
88
89 /* All other tokens are single characters.  */
90.					return yytext[0];
91
92
93%%
94
95/* Bring in the keyword recognizer.  */
96
97#include "keywords.hash.c"
98
99
100/* Macros to append to our phrase collection list.  */
101
102/*
103 * We mark any token, that that equals to a known enumerator, as
104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
105 * the only problem is struct and union members:
106 *    enum e { a, b }; struct s { int a, b; }
107 * but in this case, the only effect will be, that the ABI checksums become
108 * more volatile, which is acceptable. Also, such collisions are quite rare,
109 * so far it was only observed in include/linux/telephony.h.
110 */
111#define _APP(T,L)	do {						   \
112			  cur_node = next_node;				   \
113			  next_node = xmalloc(sizeof(*next_node));	   \
114			  next_node->next = cur_node;			   \
115			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
116			  cur_node->tag =				   \
117			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
118			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
119			  cur_node->in_source_file = in_source_file;       \
120			} while (0)
121
122#define APP		_APP(yytext, yyleng)
123
124
125/* The second stage lexer.  Here we incorporate knowledge of the state
126   of the parser to tailor the tokens that are returned.  */
127
128int
129yylex(void)
130{
131  static enum {
132    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
133    ST_BRACKET, ST_BRACE, ST_EXPRESSION,
134    ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
135    ST_TABLE_5, ST_TABLE_6
136  } lexstate = ST_NOTSTARTED;
137
138  static int suppress_type_lookup, dont_want_brace_phrase;
139  static struct string_list *next_node;
140
141  int token, count = 0;
142  struct string_list *cur_node;
143
144  if (lexstate == ST_NOTSTARTED)
145    {
146      next_node = xmalloc(sizeof(*next_node));
147      next_node->next = NULL;
148      lexstate = ST_NORMAL;
149    }
150
151repeat:
152  token = yylex1();
153
154  if (token == 0)
155    return 0;
156  else if (token == FILENAME)
157    {
158      char *file, *e;
159
160      /* Save the filename and line number for later error messages.  */
161
162      if (cur_filename)
163	free(cur_filename);
164
165      file = strchr(yytext, '\"')+1;
166      e = strchr(file, '\"');
167      *e = '\0';
168      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
169      cur_line = atoi(yytext+2);
170
171      if (!source_file) {
172        source_file = xstrdup(cur_filename);
173        in_source_file = 1;
174      } else {
175        in_source_file = (strcmp(cur_filename, source_file) == 0);
176      }
177
178      goto repeat;
179    }
180
181  switch (lexstate)
182    {
183    case ST_NORMAL:
184      switch (token)
185	{
186	case IDENT:
187	  APP;
188	  {
189	    const struct resword *r = is_reserved_word(yytext, yyleng);
190	    if (r)
191	      {
192		switch (token = r->token)
193		  {
194		  case ATTRIBUTE_KEYW:
195		    lexstate = ST_ATTRIBUTE;
196		    count = 0;
197		    goto repeat;
198		  case ASM_KEYW:
199		    lexstate = ST_ASM;
200		    count = 0;
201		    goto repeat;
202		  case TYPEOF_KEYW:
203		    lexstate = ST_TYPEOF;
204		    count = 0;
205		    goto repeat;
206
207		  case STRUCT_KEYW:
208		  case UNION_KEYW:
209		  case ENUM_KEYW:
210		    dont_want_brace_phrase = 3;
211		    suppress_type_lookup = 2;
212		    goto fini;
213
214		  case EXPORT_SYMBOL_KEYW:
215		      goto fini;
216		  }
217	      }
218	    if (!suppress_type_lookup)
219	      {
220		if (find_symbol(yytext, SYM_TYPEDEF, 1))
221		  token = TYPE;
222	      }
223	  }
224	  break;
225
226	case '[':
227	  APP;
228	  lexstate = ST_BRACKET;
229	  count = 1;
230	  goto repeat;
231
232	case '{':
233	  APP;
234	  if (dont_want_brace_phrase)
235	    break;
236	  lexstate = ST_BRACE;
237	  count = 1;
238	  goto repeat;
239
240	case '=': case ':':
241	  APP;
242	  lexstate = ST_EXPRESSION;
243	  break;
244
245	case DOTS:
246	default:
247	  APP;
248	  break;
249	}
250      break;
251
252    case ST_ATTRIBUTE:
253      APP;
254      switch (token)
255	{
256	case '(':
257	  ++count;
258	  goto repeat;
259	case ')':
260	  if (--count == 0)
261	    {
262	      lexstate = ST_NORMAL;
263	      token = ATTRIBUTE_PHRASE;
264	      break;
265	    }
266	  goto repeat;
267	default:
268	  goto repeat;
269	}
270      break;
271
272    case ST_ASM:
273      APP;
274      switch (token)
275	{
276	case '(':
277	  ++count;
278	  goto repeat;
279	case ')':
280	  if (--count == 0)
281	    {
282	      lexstate = ST_NORMAL;
283	      token = ASM_PHRASE;
284	      break;
285	    }
286	  goto repeat;
287	default:
288	  goto repeat;
289	}
290      break;
291
292    case ST_TYPEOF:
293      switch (token)
294	{
295	case '(':
296	  if ( ++count == 1 )
297	    lexstate = ST_TYPEOF_1;
298	  else
299	    APP;
300	  goto repeat;
301	case ')':
302	  APP;
303	  if (--count == 0)
304	    {
305	      lexstate = ST_NORMAL;
306	      token = TYPEOF_PHRASE;
307	      break;
308	    }
309	  goto repeat;
310	default:
311	  APP;
312	  goto repeat;
313	}
314      break;
315
316    case ST_TYPEOF_1:
317      if (token == IDENT)
318	{
319	  if (is_reserved_word(yytext, yyleng)
320	      || find_symbol(yytext, SYM_TYPEDEF, 1))
321	    {
322	      yyless(0);
323	      unput('(');
324	      lexstate = ST_NORMAL;
325	      token = TYPEOF_KEYW;
326	      break;
327	    }
328	  _APP("(", 1);
329	}
330	APP;
331	lexstate = ST_TYPEOF;
332	goto repeat;
333
334    case ST_BRACKET:
335      APP;
336      switch (token)
337	{
338	case '[':
339	  ++count;
340	  goto repeat;
341	case ']':
342	  if (--count == 0)
343	    {
344	      lexstate = ST_NORMAL;
345	      token = BRACKET_PHRASE;
346	      break;
347	    }
348	  goto repeat;
349	default:
350	  goto repeat;
351	}
352      break;
353
354    case ST_BRACE:
355      APP;
356      switch (token)
357	{
358	case '{':
359	  ++count;
360	  goto repeat;
361	case '}':
362	  if (--count == 0)
363	    {
364	      lexstate = ST_NORMAL;
365	      token = BRACE_PHRASE;
366	      break;
367	    }
368	  goto repeat;
369	default:
370	  goto repeat;
371	}
372      break;
373
374    case ST_EXPRESSION:
375      switch (token)
376	{
377	case '(': case '[': case '{':
378	  ++count;
379	  APP;
380	  goto repeat;
381	case '}':
382	  /* is this the last line of an enum declaration? */
383	  if (count == 0)
384	    {
385	      /* Put back the token we just read so's we can find it again
386		 after registering the expression.  */
387	      unput(token);
388
389	      lexstate = ST_NORMAL;
390	      token = EXPRESSION_PHRASE;
391	      break;
392	    }
393	  /* FALLTHRU */
394	case ')': case ']':
395	  --count;
396	  APP;
397	  goto repeat;
398	case ',': case ';':
399	  if (count == 0)
400	    {
401	      /* Put back the token we just read so's we can find it again
402		 after registering the expression.  */
403	      unput(token);
404
405	      lexstate = ST_NORMAL;
406	      token = EXPRESSION_PHRASE;
407	      break;
408	    }
409	  APP;
410	  goto repeat;
411	default:
412	  APP;
413	  goto repeat;
414	}
415      break;
416
417    case ST_TABLE_1:
418      goto repeat;
419
420    case ST_TABLE_2:
421      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
422	{
423	  token = EXPORT_SYMBOL_KEYW;
424	  lexstate = ST_TABLE_5;
425	  APP;
426	  break;
427	}
428      lexstate = ST_TABLE_6;
429      /* FALLTHRU */
430
431    case ST_TABLE_6:
432      switch (token)
433	{
434	case '{': case '[': case '(':
435	  ++count;
436	  break;
437	case '}': case ']': case ')':
438	  --count;
439	  break;
440	case ',':
441	  if (count == 0)
442	    lexstate = ST_TABLE_2;
443	  break;
444	};
445      goto repeat;
446
447    case ST_TABLE_3:
448      goto repeat;
449
450    case ST_TABLE_4:
451      if (token == ';')
452	lexstate = ST_NORMAL;
453      goto repeat;
454
455    case ST_TABLE_5:
456      switch (token)
457	{
458	case ',':
459	  token = ';';
460	  lexstate = ST_TABLE_2;
461	  APP;
462	  break;
463	default:
464	  APP;
465	  break;
466	}
467      break;
468
469    default:
470      exit(1);
471    }
472fini:
473
474  if (suppress_type_lookup > 0)
475    --suppress_type_lookup;
476  if (dont_want_brace_phrase > 0)
477    --dont_want_brace_phrase;
478
479  yylval = &next_node->next;
480
481  return token;
482}
483